strace.c revision 5d64581e106f47c474707001f924ee15ef22830b
1/*
2 * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3 * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4 * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5 * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 *	$Id$
31 */
32
33#include "defs.h"
34
35#include <sys/types.h>
36#include <stdarg.h>
37#include <signal.h>
38#include <errno.h>
39#include <sys/param.h>
40#include <fcntl.h>
41#include <sys/resource.h>
42#include <sys/wait.h>
43#include <sys/stat.h>
44#include <pwd.h>
45#include <grp.h>
46#include <string.h>
47#include <limits.h>
48#include <dirent.h>
49
50#ifdef LINUX
51# include <asm/unistd.h>
52# if defined __NR_tkill
53#  define my_tkill(tid, sig) syscall(__NR_tkill, (tid), (sig))
54# else
55   /* kill() may choose arbitrarily the target task of the process group
56      while we later wait on a that specific TID.  PID process waits become
57      TID task specific waits for a process under ptrace(2).  */
58#  warning "Neither tkill(2) nor tgkill(2) available, risk of strace hangs!"
59#  define my_tkill(tid, sig) kill((tid), (sig))
60# endif
61#endif
62
63#if defined(IA64) && defined(LINUX)
64# include <asm/ptrace_offsets.h>
65#endif
66
67#ifdef USE_PROCFS
68#include <poll.h>
69#endif
70
71#ifdef SVR4
72#include <sys/stropts.h>
73#ifdef HAVE_MP_PROCFS
74#ifdef HAVE_SYS_UIO_H
75#include <sys/uio.h>
76#endif
77#endif
78#endif
79extern char **environ;
80extern int optind;
81extern char *optarg;
82
83
84int debug = 0, followfork = 0;
85unsigned int ptrace_setoptions = 0;
86/* Which WSTOPSIG(status) value marks syscall traps? */
87static unsigned int syscall_trap_sig = SIGTRAP;
88int dtime = 0, xflag = 0, qflag = 0;
89cflag_t cflag = CFLAG_NONE;
90static int iflag = 0, interactive = 0, pflag_seen = 0, rflag = 0, tflag = 0;
91/*
92 * daemonized_tracer supports -D option.
93 * With this option, strace forks twice.
94 * Unlike normal case, with -D *grandparent* process exec's,
95 * becoming a traced process. Child exits (this prevents traced process
96 * from having children it doesn't expect to have), and grandchild
97 * attaches to grandparent similarly to strace -p PID.
98 * This allows for more transparent interaction in cases
99 * when process and its parent are communicating via signals,
100 * wait() etc. Without -D, strace process gets lodged in between,
101 * disrupting parent<->child link.
102 */
103static bool daemonized_tracer = 0;
104
105/* Sometimes we want to print only succeeding syscalls. */
106int not_failing_only = 0;
107
108/* Show path associated with fd arguments */
109int show_fd_path = 0;
110
111/* are we filtering traces based on paths? */
112int tracing_paths = 0;
113
114static int exit_code = 0;
115static int strace_child = 0;
116static int strace_tracer_pid = 0;
117
118static char *username = NULL;
119static uid_t run_uid;
120static gid_t run_gid;
121
122int acolumn = DEFAULT_ACOLUMN;
123int max_strlen = DEFAULT_STRLEN;
124static char *outfname = NULL;
125static FILE *outf;
126static int curcol;
127static struct tcb **tcbtab;
128static unsigned int nprocs, tcbtabsize;
129static const char *progname;
130
131static int detach(struct tcb *tcp, int sig);
132static int trace(void);
133static void cleanup(void);
134static void interrupt(int sig);
135static sigset_t empty_set, blocked_set;
136
137#ifdef HAVE_SIG_ATOMIC_T
138static volatile sig_atomic_t interrupted;
139#else /* !HAVE_SIG_ATOMIC_T */
140static volatile int interrupted;
141#endif /* !HAVE_SIG_ATOMIC_T */
142
143#ifdef USE_PROCFS
144
145static struct tcb *pfd2tcb(int pfd);
146static void reaper(int sig);
147static void rebuild_pollv(void);
148static struct pollfd *pollv;
149
150#ifndef HAVE_POLLABLE_PROCFS
151
152static void proc_poll_open(void);
153static void proc_poller(int pfd);
154
155struct proc_pollfd {
156	int fd;
157	int revents;
158	int pid;
159};
160
161static int poller_pid;
162static int proc_poll_pipe[2] = { -1, -1 };
163
164#endif /* !HAVE_POLLABLE_PROCFS */
165
166#ifdef HAVE_MP_PROCFS
167#define POLLWANT	POLLWRNORM
168#else
169#define POLLWANT	POLLPRI
170#endif
171#endif /* USE_PROCFS */
172
173static void
174usage(FILE *ofp, int exitval)
175{
176	fprintf(ofp, "\
177usage: strace [-CdDffhiqrtttTvVxxy] [-a column] [-e expr] ... [-o file]\n\
178              [-p pid] ... [-s strsize] [-u username] [-E var=val] ...\n\
179              [-P path] [command [arg ...]]\n\
180   or: strace -c [-D] [-e expr] ... [-O overhead] [-S sortby] [-E var=val] ...\n\
181              [command [arg ...]]\n\
182-c -- count time, calls, and errors for each syscall and report summary\n\
183-C -- like -c but also print regular output while processes are running\n\
184-f -- follow forks, -ff -- with output into separate files\n\
185-F -- attempt to follow vforks, -h -- print help message\n\
186-i -- print instruction pointer at time of syscall\n\
187-q -- suppress messages about attaching, detaching, etc.\n\
188-r -- print relative timestamp, -t -- absolute timestamp, -tt -- with usecs\n\
189-T -- print time spent in each syscall, -V -- print version\n\
190-v -- verbose mode: print unabbreviated argv, stat, termio[s], etc. args\n\
191-x -- print non-ascii strings in hex, -xx -- print all strings in hex\n\
192-y -- print paths associated with file descriptor arguments\n\
193-a column -- alignment COLUMN for printing syscall results (default %d)\n\
194-e expr -- a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
195   options: trace, abbrev, verbose, raw, signal, read, or write\n\
196-o file -- send trace output to FILE instead of stderr\n\
197-O overhead -- set overhead for tracing syscalls to OVERHEAD usecs\n\
198-p pid -- trace process with process id PID, may be repeated\n\
199-D -- run tracer process as a detached grandchild, not as parent\n\
200-s strsize -- limit length of print strings to STRSIZE chars (default %d)\n\
201-S sortby -- sort syscall counts by: time, calls, name, nothing (default %s)\n\
202-u username -- run command as username handling setuid and/or setgid\n\
203-E var=val -- put var=val in the environment for command\n\
204-E var -- remove var from the environment for command\n\
205-P path -- trace accesses to path\n\
206" /* this is broken, so don't document it
207-z -- print only succeeding syscalls\n\
208  */
209, DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
210	exit(exitval);
211}
212
213static void die(void) __attribute__ ((noreturn));
214static void die(void)
215{
216	if (strace_tracer_pid == getpid()) {
217		cflag = 0;
218		cleanup();
219	}
220	exit(1);
221}
222
223static void verror_msg(int err_no, const char *fmt, va_list p)
224{
225	fflush(NULL);
226	fprintf(stderr, "%s: ", progname);
227	vfprintf(stderr, fmt, p);
228	if (err_no)
229		fprintf(stderr, ": %s\n", strerror(err_no));
230	else
231		putc('\n', stderr);
232	fflush(stderr);
233}
234
235void error_msg(const char *fmt, ...)
236{
237	va_list p;
238	va_start(p, fmt);
239	verror_msg(0, fmt, p);
240	va_end(p);
241}
242
243void error_msg_and_die(const char *fmt, ...)
244{
245	va_list p;
246	va_start(p, fmt);
247	verror_msg(0, fmt, p);
248	die();
249}
250
251void perror_msg(const char *fmt, ...)
252{
253	va_list p;
254	va_start(p, fmt);
255	verror_msg(errno, fmt, p);
256	va_end(p);
257}
258
259void perror_msg_and_die(const char *fmt, ...)
260{
261	va_list p;
262	va_start(p, fmt);
263	verror_msg(errno, fmt, p);
264	die();
265}
266
267#ifdef SVR4
268#ifdef MIPS
269void
270foobar()
271{
272}
273#endif /* MIPS */
274#endif /* SVR4 */
275
276/* Glue for systems without a MMU that cannot provide fork() */
277#ifdef HAVE_FORK
278# define strace_vforked 0
279#else
280# define strace_vforked 1
281# define fork()         vfork()
282#endif
283
284static void
285set_cloexec_flag(int fd)
286{
287	int flags, newflags;
288
289	flags = fcntl(fd, F_GETFD);
290	if (flags < 0) {
291		/* Can happen only if fd is bad.
292		 * Should never happen: if it does, we have a bug
293		 * in the caller. Therefore we just abort
294		 * instead of propagating the error.
295		 */
296		perror_msg_and_die("fcntl(%d, F_GETFD)", fd);
297	}
298
299	newflags = flags | FD_CLOEXEC;
300	if (flags == newflags)
301		return;
302
303	fcntl(fd, F_SETFD, newflags); /* never fails */
304}
305
306/*
307 * When strace is setuid executable, we have to swap uids
308 * before and after filesystem and process management operations.
309 */
310static void
311swap_uid(void)
312{
313#ifndef SVR4
314	int euid = geteuid(), uid = getuid();
315
316	if (euid != uid && setreuid(euid, uid) < 0) {
317		perror_msg_and_die("setreuid");
318	}
319#endif
320}
321
322#if _LFS64_LARGEFILE
323# define fopen_for_output fopen64
324#else
325# define fopen_for_output fopen
326#endif
327
328static FILE *
329strace_fopen(const char *path)
330{
331	FILE *fp;
332
333	swap_uid();
334	fp = fopen_for_output(path, "w");
335	if (!fp)
336		perror_msg_and_die("Can't fopen '%s'", path);
337	swap_uid();
338	set_cloexec_flag(fileno(fp));
339	return fp;
340}
341
342static int popen_pid = 0;
343
344#ifndef _PATH_BSHELL
345# define _PATH_BSHELL "/bin/sh"
346#endif
347
348/*
349 * We cannot use standard popen(3) here because we have to distinguish
350 * popen child process from other processes we trace, and standard popen(3)
351 * does not export its child's pid.
352 */
353static FILE *
354strace_popen(const char *command)
355{
356	FILE *fp;
357	int fds[2];
358
359	swap_uid();
360	if (pipe(fds) < 0)
361		perror_msg_and_die("pipe");
362
363	set_cloexec_flag(fds[1]); /* never fails */
364
365	popen_pid = vfork();
366	if (popen_pid == -1)
367		perror_msg_and_die("vfork");
368
369	if (popen_pid == 0) {
370		/* child */
371		close(fds[1]);
372		if (fds[0] != 0) {
373			if (dup2(fds[0], 0))
374				perror_msg_and_die("dup2");
375			close(fds[0]);
376		}
377		execl(_PATH_BSHELL, "sh", "-c", command, NULL);
378		perror_msg_and_die("Can't execute '%s'", _PATH_BSHELL);
379	}
380
381	/* parent */
382	close(fds[0]);
383	swap_uid();
384	fp = fdopen(fds[1], "w");
385	if (!fp)
386		error_msg_and_die("Out of memory");
387	return fp;
388}
389
390static void
391newoutf(struct tcb *tcp)
392{
393	if (outfname && followfork > 1) {
394		char name[520 + sizeof(int) * 3];
395		sprintf(name, "%.512s.%u", outfname, tcp->pid);
396		tcp->outf = strace_fopen(name);
397	}
398}
399
400static void
401startup_attach(void)
402{
403	int tcbi;
404	struct tcb *tcp;
405
406	/*
407	 * Block user interruptions as we would leave the traced
408	 * process stopped (process state T) if we would terminate in
409	 * between PTRACE_ATTACH and wait4 () on SIGSTOP.
410	 * We rely on cleanup() from this point on.
411	 */
412	if (interactive)
413		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
414
415	if (daemonized_tracer) {
416		pid_t pid = fork();
417		if (pid < 0) {
418			_exit(1);
419		}
420		if (pid) { /* parent */
421			/*
422			 * Wait for grandchild to attach to straced process
423			 * (grandparent). Grandchild SIGKILLs us after it attached.
424			 * Grandparent's wait() is unblocked by our death,
425			 * it proceeds to exec the straced program.
426			 */
427			pause();
428			_exit(0); /* paranoia */
429		}
430		/* grandchild */
431		/* We will be the tracer process. Remember our new pid: */
432		strace_tracer_pid = getpid();
433	}
434
435	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
436		tcp = tcbtab[tcbi];
437
438		if (!(tcp->flags & TCB_INUSE) || !(tcp->flags & TCB_ATTACHED))
439			continue;
440#ifdef LINUX
441		if (tcp->flags & TCB_ATTACH_DONE)
442			continue;
443#endif
444		/* Reinitialize the output since it may have changed. */
445		tcp->outf = outf;
446		newoutf(tcp);
447
448#ifdef USE_PROCFS
449		if (proc_open(tcp, 1) < 0) {
450			fprintf(stderr, "trouble opening proc file\n");
451			droptcb(tcp);
452			continue;
453		}
454#else /* !USE_PROCFS */
455# ifdef LINUX
456		if (followfork && !daemonized_tracer) {
457			char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
458			DIR *dir;
459
460			sprintf(procdir, "/proc/%d/task", tcp->pid);
461			dir = opendir(procdir);
462			if (dir != NULL) {
463				unsigned int ntid = 0, nerr = 0;
464				struct dirent *de;
465				int tid;
466				while ((de = readdir(dir)) != NULL) {
467					if (de->d_fileno == 0)
468						continue;
469					tid = atoi(de->d_name);
470					if (tid <= 0)
471						continue;
472					++ntid;
473					if (ptrace(PTRACE_ATTACH, tid, (char *) 1, 0) < 0) {
474						++nerr;
475						if (debug)
476							fprintf(stderr, "attach to pid %d failed\n", tid);
477					}
478					else {
479						if (debug)
480							fprintf(stderr, "attach to pid %d succeeded\n", tid);
481						if (tid != tcp->pid) {
482							struct tcb *new_tcp = alloctcb(tid);
483							new_tcp->flags |= TCB_ATTACHED|TCB_ATTACH_DONE;
484						}
485					}
486					if (interactive) {
487						sigprocmask(SIG_SETMASK, &empty_set, NULL);
488						if (interrupted)
489							goto ret;
490						sigprocmask(SIG_BLOCK, &blocked_set, NULL);
491					}
492				}
493				closedir(dir);
494				ntid -= nerr;
495				if (ntid == 0) {
496					perror("attach: ptrace(PTRACE_ATTACH, ...)");
497					droptcb(tcp);
498					continue;
499				}
500				if (!qflag) {
501					fprintf(stderr, ntid > 1
502? "Process %u attached with %u threads - interrupt to quit\n"
503: "Process %u attached - interrupt to quit\n",
504						tcp->pid, ntid);
505				}
506				continue;
507			} /* if (opendir worked) */
508		} /* if (-f) */
509# endif /* LINUX */
510		if (ptrace(PTRACE_ATTACH, tcp->pid, (char *) 1, 0) < 0) {
511			perror("attach: ptrace(PTRACE_ATTACH, ...)");
512			droptcb(tcp);
513			continue;
514		}
515		if (debug)
516			fprintf(stderr, "attach to pid %d (main) succeeded\n", tcp->pid);
517
518		if (daemonized_tracer) {
519			/*
520			 * It is our grandparent we trace, not a -p PID.
521			 * Don't want to just detach on exit, so...
522			 */
523			tcp->flags &= ~TCB_ATTACHED;
524			/*
525			 * Make parent go away.
526			 * Also makes grandparent's wait() unblock.
527			 */
528			kill(getppid(), SIGKILL);
529		}
530
531#endif /* !USE_PROCFS */
532		if (!qflag)
533			fprintf(stderr,
534				"Process %u attached - interrupt to quit\n",
535				tcp->pid);
536	} /* for each tcbtab[] */
537
538 ret:
539#ifdef LINUX
540	/* TCB_ATTACH_DONE flag is used only in this function */
541	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
542		tcp = tcbtab[tcbi];
543		tcp->flags &= ~TCB_ATTACH_DONE;
544	}
545#endif
546
547	if (interactive)
548		sigprocmask(SIG_SETMASK, &empty_set, NULL);
549}
550
551static void
552startup_child(char **argv)
553{
554	struct stat statbuf;
555	const char *filename;
556	char pathname[MAXPATHLEN];
557	int pid = 0;
558	struct tcb *tcp;
559
560	filename = argv[0];
561	if (strchr(filename, '/')) {
562		if (strlen(filename) > sizeof pathname - 1) {
563			errno = ENAMETOOLONG;
564			perror_msg_and_die("exec");
565		}
566		strcpy(pathname, filename);
567	}
568#ifdef USE_DEBUGGING_EXEC
569	/*
570	 * Debuggers customarily check the current directory
571	 * first regardless of the path but doing that gives
572	 * security geeks a panic attack.
573	 */
574	else if (stat(filename, &statbuf) == 0)
575		strcpy(pathname, filename);
576#endif /* USE_DEBUGGING_EXEC */
577	else {
578		const char *path;
579		int m, n, len;
580
581		for (path = getenv("PATH"); path && *path; path += m) {
582			if (strchr(path, ':')) {
583				n = strchr(path, ':') - path;
584				m = n + 1;
585			}
586			else
587				m = n = strlen(path);
588			if (n == 0) {
589				if (!getcwd(pathname, MAXPATHLEN))
590					continue;
591				len = strlen(pathname);
592			}
593			else if (n > sizeof pathname - 1)
594				continue;
595			else {
596				strncpy(pathname, path, n);
597				len = n;
598			}
599			if (len && pathname[len - 1] != '/')
600				pathname[len++] = '/';
601			strcpy(pathname + len, filename);
602			if (stat(pathname, &statbuf) == 0 &&
603			    /* Accept only regular files
604			       with some execute bits set.
605			       XXX not perfect, might still fail */
606			    S_ISREG(statbuf.st_mode) &&
607			    (statbuf.st_mode & 0111))
608				break;
609		}
610	}
611	if (stat(pathname, &statbuf) < 0) {
612		perror_msg_and_die("Can't stat '%s'", filename);
613	}
614	strace_child = pid = fork();
615	if (pid < 0) {
616		perror_msg_and_die("fork");
617	}
618	if ((pid != 0 && daemonized_tracer) /* -D: parent to become a traced process */
619	 || (pid == 0 && !daemonized_tracer) /* not -D: child to become a traced process */
620	) {
621		pid = getpid();
622#ifdef USE_PROCFS
623		if (outf != stderr) close(fileno(outf));
624#ifdef MIPS
625		/* Kludge for SGI, see proc_open for details. */
626		sa.sa_handler = foobar;
627		sa.sa_flags = 0;
628		sigemptyset(&sa.sa_mask);
629		sigaction(SIGINT, &sa, NULL);
630#endif /* MIPS */
631#ifndef FREEBSD
632		pause();
633#else /* FREEBSD */
634		kill(pid, SIGSTOP); /* stop HERE */
635#endif /* FREEBSD */
636#else /* !USE_PROCFS */
637		if (outf != stderr)
638			close(fileno(outf));
639
640		if (!daemonized_tracer) {
641			if (ptrace(PTRACE_TRACEME, 0, (char *) 1, 0) < 0) {
642				perror_msg_and_die("ptrace(PTRACE_TRACEME, ...)");
643			}
644			if (debug)
645				kill(pid, SIGSTOP);
646		}
647
648		if (username != NULL || geteuid() == 0) {
649			uid_t run_euid = run_uid;
650			gid_t run_egid = run_gid;
651
652			if (statbuf.st_mode & S_ISUID)
653				run_euid = statbuf.st_uid;
654			if (statbuf.st_mode & S_ISGID)
655				run_egid = statbuf.st_gid;
656
657			/*
658			 * It is important to set groups before we
659			 * lose privileges on setuid.
660			 */
661			if (username != NULL) {
662				if (initgroups(username, run_gid) < 0) {
663					perror_msg_and_die("initgroups");
664				}
665				if (setregid(run_gid, run_egid) < 0) {
666					perror_msg_and_die("setregid");
667				}
668				if (setreuid(run_uid, run_euid) < 0) {
669					perror_msg_and_die("setreuid");
670				}
671			}
672		}
673		else
674			setreuid(run_uid, run_uid);
675
676		if (!daemonized_tracer) {
677			/*
678			 * Induce an immediate stop so that the parent
679			 * will resume us with PTRACE_SYSCALL and display
680			 * this execve call normally.
681			 * Unless of course we're on a no-MMU system where
682			 * we vfork()-ed, so we cannot stop the child.
683			 */
684			if (!strace_vforked)
685				kill(getpid(), SIGSTOP);
686		} else {
687			struct sigaction sv_sigchld;
688			sigaction(SIGCHLD, NULL, &sv_sigchld);
689			/*
690			 * Make sure it is not SIG_IGN, otherwise wait
691			 * will not block.
692			 */
693			signal(SIGCHLD, SIG_DFL);
694			/*
695			 * Wait for grandchild to attach to us.
696			 * It kills child after that, and wait() unblocks.
697			 */
698			alarm(3);
699			wait(NULL);
700			alarm(0);
701			sigaction(SIGCHLD, &sv_sigchld, NULL);
702		}
703#endif /* !USE_PROCFS */
704
705		execv(pathname, argv);
706		perror_msg_and_die("exec");
707	}
708
709	/* We are the tracer.  */
710	/* With -D, we are *child* here, IOW: different pid. Fetch it. */
711	strace_tracer_pid = getpid();
712
713	tcp = alloctcb(daemonized_tracer ? getppid() : pid);
714	if (daemonized_tracer) {
715		/* We want subsequent startup_attach() to attach to it.  */
716		tcp->flags |= TCB_ATTACHED;
717	}
718#ifdef USE_PROCFS
719	if (proc_open(tcp, 0) < 0) {
720		perror_msg_and_die("trouble opening proc file");
721	}
722#endif /* USE_PROCFS */
723}
724
725#ifdef LINUX
726static void kill_save_errno(pid_t pid, int sig)
727{
728	int saved_errno = errno;
729
730	(void) kill(pid, sig);
731	errno = saved_errno;
732}
733
734/*
735 * Test whether the kernel support PTRACE_O_TRACECLONE et al options.
736 * First fork a new child, call ptrace with PTRACE_SETOPTIONS on it,
737 * and then see which options are supported by the kernel.
738 */
739static void
740test_ptrace_setoptions_followfork(void)
741{
742	int pid, expected_grandchild = 0, found_grandchild = 0;
743	const unsigned int test_options = PTRACE_O_TRACECLONE |
744					  PTRACE_O_TRACEFORK |
745					  PTRACE_O_TRACEVFORK;
746
747	pid = fork();
748	if (pid < 0)
749		perror_msg_and_die("fork");
750	if (pid == 0) {
751		pid = getpid();
752		if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0)
753			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
754					   __func__);
755		kill(pid, SIGSTOP);
756		if (fork() < 0)
757			perror_msg_and_die("fork");
758		_exit(0);
759	}
760
761	while (1) {
762		int status, tracee_pid;
763
764		errno = 0;
765		tracee_pid = wait(&status);
766		if (tracee_pid <= 0) {
767			if (errno == EINTR)
768				continue;
769			else if (errno == ECHILD)
770				break;
771			kill_save_errno(pid, SIGKILL);
772			perror_msg_and_die("%s: unexpected wait result %d",
773					   __func__, tracee_pid);
774		}
775		if (WIFEXITED(status)) {
776			if (WEXITSTATUS(status)) {
777				if (tracee_pid != pid)
778					kill_save_errno(pid, SIGKILL);
779				error_msg_and_die("%s: unexpected exit status %u",
780						  __func__, WEXITSTATUS(status));
781			}
782			continue;
783		}
784		if (WIFSIGNALED(status)) {
785			if (tracee_pid != pid)
786				kill_save_errno(pid, SIGKILL);
787			error_msg_and_die("%s: unexpected signal %u",
788					  __func__, WTERMSIG(status));
789		}
790		if (!WIFSTOPPED(status)) {
791			if (tracee_pid != pid)
792				kill_save_errno(tracee_pid, SIGKILL);
793			kill(pid, SIGKILL);
794			error_msg_and_die("%s: unexpected wait status %x",
795					  __func__, status);
796		}
797		if (tracee_pid != pid) {
798			found_grandchild = tracee_pid;
799			if (ptrace(PTRACE_CONT, tracee_pid, 0, 0) < 0) {
800				kill_save_errno(tracee_pid, SIGKILL);
801				kill_save_errno(pid, SIGKILL);
802				perror_msg_and_die("PTRACE_CONT doesn't work");
803			}
804			continue;
805		}
806		switch (WSTOPSIG(status)) {
807		case SIGSTOP:
808			if (ptrace(PTRACE_SETOPTIONS, pid, 0, test_options) < 0
809			    && errno != EINVAL && errno != EIO)
810				perror_msg("PTRACE_SETOPTIONS");
811			break;
812		case SIGTRAP:
813			if (status >> 16 == PTRACE_EVENT_FORK) {
814				long msg = 0;
815
816				if (ptrace(PTRACE_GETEVENTMSG, pid,
817					   NULL, (long) &msg) == 0)
818					expected_grandchild = msg;
819			}
820			break;
821		}
822		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) {
823			kill_save_errno(pid, SIGKILL);
824			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
825		}
826	}
827	if (expected_grandchild && expected_grandchild == found_grandchild) {
828		ptrace_setoptions |= test_options;
829		if (debug)
830			fprintf(stderr, "ptrace_setoptions = %#x\n",
831				ptrace_setoptions);
832		return;
833	}
834	error_msg("Test for PTRACE_O_TRACECLONE failed, "
835		  "giving up using this feature.");
836}
837
838/*
839 * Test whether the kernel support PTRACE_O_TRACESYSGOOD.
840 * First fork a new child, call ptrace(PTRACE_SETOPTIONS) on it,
841 * and then see whether it will stop with (SIGTRAP | 0x80).
842 *
843 * Use of this option enables correct handling of user-generated SIGTRAPs,
844 * and SIGTRAPs generated by special instructions such as int3 on x86:
845 * _start:	.globl	_start
846 *		int3
847 *		movl	$42, %ebx
848 *		movl	$1, %eax
849 *		int	$0x80
850 * (compile with: "gcc -nostartfiles -nostdlib -o int3 int3.S")
851 */
852static void
853test_ptrace_setoptions_for_all(void)
854{
855	const unsigned int test_options = PTRACE_O_TRACESYSGOOD |
856					  PTRACE_O_TRACEEXEC;
857	int pid;
858	int it_worked = 0;
859
860	pid = fork();
861	if (pid < 0)
862		perror_msg_and_die("fork");
863
864	if (pid == 0) {
865		pid = getpid();
866		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
867			/* Note: exits with exitcode 1 */
868			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
869					   __func__);
870		kill(pid, SIGSTOP);
871		_exit(0); /* parent should see entry into this syscall */
872	}
873
874	while (1) {
875		int status, tracee_pid;
876
877		errno = 0;
878		tracee_pid = wait(&status);
879		if (tracee_pid <= 0) {
880			if (errno == EINTR)
881				continue;
882			kill_save_errno(pid, SIGKILL);
883			perror_msg_and_die("%s: unexpected wait result %d",
884					   __func__, tracee_pid);
885		}
886		if (WIFEXITED(status)) {
887			if (WEXITSTATUS(status) == 0)
888				break;
889			error_msg_and_die("%s: unexpected exit status %u",
890					  __func__, WEXITSTATUS(status));
891		}
892		if (WIFSIGNALED(status)) {
893			error_msg_and_die("%s: unexpected signal %u",
894					  __func__, WTERMSIG(status));
895		}
896		if (!WIFSTOPPED(status)) {
897			kill(pid, SIGKILL);
898			error_msg_and_die("%s: unexpected wait status %x",
899					  __func__, status);
900		}
901		if (WSTOPSIG(status) == SIGSTOP) {
902			/*
903			 * We don't check "options aren't accepted" error.
904			 * If it happens, we'll never get (SIGTRAP | 0x80),
905			 * and thus will decide to not use the option.
906			 * IOW: the outcome of the test will be correct.
907			 */
908			if (ptrace(PTRACE_SETOPTIONS, pid, 0L, test_options) < 0
909			    && errno != EINVAL && errno != EIO)
910				perror_msg("PTRACE_SETOPTIONS");
911		}
912		if (WSTOPSIG(status) == (SIGTRAP | 0x80)) {
913			it_worked = 1;
914		}
915		if (ptrace(PTRACE_SYSCALL, pid, 0L, 0L) < 0) {
916			kill_save_errno(pid, SIGKILL);
917			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
918		}
919	}
920
921	if (it_worked) {
922		syscall_trap_sig = (SIGTRAP | 0x80);
923		ptrace_setoptions |= test_options;
924		if (debug)
925			fprintf(stderr, "ptrace_setoptions = %#x\n",
926				ptrace_setoptions);
927		return;
928	}
929
930	error_msg("Test for PTRACE_O_TRACESYSGOOD failed, "
931		  "giving up using this feature.");
932}
933#endif
934
935int
936main(int argc, char *argv[])
937{
938	struct tcb *tcp;
939	int c, pid = 0;
940	int optF = 0;
941	struct sigaction sa;
942
943	progname = argv[0] ? argv[0] : "strace";
944
945	strace_tracer_pid = getpid();
946
947	/* Allocate the initial tcbtab.  */
948	tcbtabsize = argc;	/* Surely enough for all -p args.  */
949	tcbtab = calloc(tcbtabsize, sizeof(tcbtab[0]));
950	if (tcbtab == NULL)
951		error_msg_and_die("Out of memory");
952	tcp = calloc(tcbtabsize, sizeof(*tcp));
953	if (tcp == NULL)
954		error_msg_and_die("Out of memory");
955	for (c = 0; c < tcbtabsize; c++)
956		tcbtab[c] = tcp++;
957
958	outf = stderr;
959	interactive = 1;
960	set_sortby(DEFAULT_SORTBY);
961	set_personality(DEFAULT_PERSONALITY);
962	qualify("trace=all");
963	qualify("abbrev=all");
964	qualify("verbose=all");
965	qualify("signal=all");
966	while ((c = getopt(argc, argv,
967		"+cCdfFhiqrtTvVxyz"
968#ifndef USE_PROCFS
969		"D"
970#endif
971		"a:e:o:O:p:s:S:u:E:P:")) != EOF) {
972		switch (c) {
973		case 'c':
974			if (cflag == CFLAG_BOTH) {
975				error_msg_and_die("-c and -C are mutually exclusive options");
976			}
977			cflag = CFLAG_ONLY_STATS;
978			break;
979		case 'C':
980			if (cflag == CFLAG_ONLY_STATS) {
981				error_msg_and_die("-c and -C are mutually exclusive options");
982			}
983			cflag = CFLAG_BOTH;
984			break;
985		case 'd':
986			debug++;
987			break;
988#ifndef USE_PROCFS
989		case 'D':
990			daemonized_tracer = 1;
991			break;
992#endif
993		case 'F':
994			optF = 1;
995			break;
996		case 'f':
997			followfork++;
998			break;
999		case 'h':
1000			usage(stdout, 0);
1001			break;
1002		case 'i':
1003			iflag++;
1004			break;
1005		case 'q':
1006			qflag++;
1007			break;
1008		case 'r':
1009			rflag++;
1010			tflag++;
1011			break;
1012		case 't':
1013			tflag++;
1014			break;
1015		case 'T':
1016			dtime++;
1017			break;
1018		case 'x':
1019			xflag++;
1020			break;
1021		case 'y':
1022			show_fd_path = 1;
1023			break;
1024		case 'v':
1025			qualify("abbrev=none");
1026			break;
1027		case 'V':
1028			printf("%s -- version %s\n", PACKAGE_NAME, VERSION);
1029			exit(0);
1030			break;
1031		case 'z':
1032			not_failing_only = 1;
1033			break;
1034		case 'a':
1035			acolumn = atoi(optarg);
1036			break;
1037		case 'e':
1038			qualify(optarg);
1039			break;
1040		case 'o':
1041			outfname = strdup(optarg);
1042			break;
1043		case 'O':
1044			set_overhead(atoi(optarg));
1045			break;
1046		case 'p':
1047			pid = atoi(optarg);
1048			if (pid <= 0) {
1049				error_msg("Invalid process id: '%s'", optarg);
1050				break;
1051			}
1052			if (pid == strace_tracer_pid) {
1053				error_msg("I'm sorry, I can't let you do that, Dave.");
1054				break;
1055			}
1056			tcp = alloc_tcb(pid, 0);
1057			tcp->flags |= TCB_ATTACHED;
1058			pflag_seen++;
1059			break;
1060		case 'P':
1061			tracing_paths = 1;
1062			if (pathtrace_select(optarg)) {
1063				error_msg_and_die("Failed to select path '%s'", optarg);
1064			}
1065			break;
1066		case 's':
1067			max_strlen = atoi(optarg);
1068			if (max_strlen < 0) {
1069				error_msg_and_die("Invalid -s argument: '%s'", optarg);
1070			}
1071			break;
1072		case 'S':
1073			set_sortby(optarg);
1074			break;
1075		case 'u':
1076			username = strdup(optarg);
1077			break;
1078		case 'E':
1079			if (putenv(optarg) < 0) {
1080				error_msg_and_die("Out of memory");
1081			}
1082			break;
1083		default:
1084			usage(stderr, 1);
1085			break;
1086		}
1087	}
1088
1089	if ((optind == argc) == !pflag_seen)
1090		usage(stderr, 1);
1091
1092	if (pflag_seen && daemonized_tracer) {
1093		error_msg_and_die("-D and -p are mutually exclusive options");
1094	}
1095
1096	if (!followfork)
1097		followfork = optF;
1098
1099	if (followfork > 1 && cflag) {
1100		error_msg_and_die("(-c or -C) and -ff are mutually exclusive options");
1101	}
1102
1103	/* See if they want to run as another user. */
1104	if (username != NULL) {
1105		struct passwd *pent;
1106
1107		if (getuid() != 0 || geteuid() != 0) {
1108			error_msg_and_die("You must be root to use the -u option");
1109		}
1110		pent = getpwnam(username);
1111		if (pent == NULL) {
1112			error_msg_and_die("Cannot find user '%s'", username);
1113		}
1114		run_uid = pent->pw_uid;
1115		run_gid = pent->pw_gid;
1116	}
1117	else {
1118		run_uid = getuid();
1119		run_gid = getgid();
1120	}
1121
1122#ifdef LINUX
1123	if (followfork)
1124		test_ptrace_setoptions_followfork();
1125	test_ptrace_setoptions_for_all();
1126#endif
1127
1128	/* Check if they want to redirect the output. */
1129	if (outfname) {
1130		/* See if they want to pipe the output. */
1131		if (outfname[0] == '|' || outfname[0] == '!') {
1132			/*
1133			 * We can't do the <outfname>.PID funny business
1134			 * when using popen, so prohibit it.
1135			 */
1136			if (followfork > 1)
1137				error_msg_and_die("Piping the output and -ff are mutually exclusive");
1138			outf = strace_popen(outfname + 1);
1139		}
1140		else if (followfork <= 1)
1141			outf = strace_fopen(outfname);
1142	}
1143
1144	if (!outfname || outfname[0] == '|' || outfname[0] == '!') {
1145		static char buf[BUFSIZ];
1146		setvbuf(outf, buf, _IOLBF, BUFSIZ);
1147	}
1148	if (outfname && optind < argc) {
1149		interactive = 0;
1150		qflag = 1;
1151	}
1152
1153	/* Valid states here:
1154	   optind < argc	pflag_seen	outfname	interactive
1155	   1			0		0		1
1156	   0			1		0		1
1157	   1			0		1		0
1158	   0			1		1		1
1159	 */
1160
1161	/* STARTUP_CHILD must be called before the signal handlers get
1162	   installed below as they are inherited into the spawned process.
1163	   Also we do not need to be protected by them as during interruption
1164	   in the STARTUP_CHILD mode we kill the spawned process anyway.  */
1165	if (!pflag_seen)
1166		startup_child(&argv[optind]);
1167
1168	sigemptyset(&empty_set);
1169	sigemptyset(&blocked_set);
1170	sa.sa_handler = SIG_IGN;
1171	sigemptyset(&sa.sa_mask);
1172	sa.sa_flags = 0;
1173	sigaction(SIGTTOU, &sa, NULL);
1174	sigaction(SIGTTIN, &sa, NULL);
1175	if (interactive) {
1176		sigaddset(&blocked_set, SIGHUP);
1177		sigaddset(&blocked_set, SIGINT);
1178		sigaddset(&blocked_set, SIGQUIT);
1179		sigaddset(&blocked_set, SIGPIPE);
1180		sigaddset(&blocked_set, SIGTERM);
1181		sa.sa_handler = interrupt;
1182#ifdef SUNOS4
1183		/* POSIX signals on sunos4.1 are a little broken. */
1184		sa.sa_flags = SA_INTERRUPT;
1185#endif /* SUNOS4 */
1186	}
1187	sigaction(SIGHUP, &sa, NULL);
1188	sigaction(SIGINT, &sa, NULL);
1189	sigaction(SIGQUIT, &sa, NULL);
1190	sigaction(SIGPIPE, &sa, NULL);
1191	sigaction(SIGTERM, &sa, NULL);
1192#ifdef USE_PROCFS
1193	sa.sa_handler = reaper;
1194	sigaction(SIGCHLD, &sa, NULL);
1195#else
1196	/* Make sure SIGCHLD has the default action so that waitpid
1197	   definitely works without losing track of children.  The user
1198	   should not have given us a bogus state to inherit, but he might
1199	   have.  Arguably we should detect SIG_IGN here and pass it on
1200	   to children, but probably noone really needs that.  */
1201	sa.sa_handler = SIG_DFL;
1202	sigaction(SIGCHLD, &sa, NULL);
1203#endif /* USE_PROCFS */
1204
1205	if (pflag_seen || daemonized_tracer)
1206		startup_attach();
1207
1208	if (trace() < 0)
1209		exit(1);
1210	cleanup();
1211	fflush(NULL);
1212	if (exit_code > 0xff) {
1213		/* Child was killed by a signal, mimic that.  */
1214		exit_code &= 0xff;
1215		signal(exit_code, SIG_DFL);
1216		raise(exit_code);
1217		/* Paranoia - what if this signal is not fatal?
1218		   Exit with 128 + signo then.  */
1219		exit_code += 128;
1220	}
1221	exit(exit_code);
1222}
1223
1224static void
1225expand_tcbtab(void)
1226{
1227	/* Allocate some more TCBs and expand the table.
1228	   We don't want to relocate the TCBs because our
1229	   callers have pointers and it would be a pain.
1230	   So tcbtab is a table of pointers.  Since we never
1231	   free the TCBs, we allocate a single chunk of many.  */
1232	int i = tcbtabsize;
1233	struct tcb *newtcbs = calloc(tcbtabsize, sizeof(newtcbs[0]));
1234	struct tcb **newtab = realloc(tcbtab, tcbtabsize * 2 * sizeof(tcbtab[0]));
1235	if (newtab == NULL || newtcbs == NULL)
1236		error_msg_and_die("expand_tcbtab: out of memory");
1237	tcbtabsize *= 2;
1238	tcbtab = newtab;
1239	while (i < tcbtabsize)
1240		tcbtab[i++] = newtcbs++;
1241}
1242
1243struct tcb *
1244alloc_tcb(int pid, int command_options_parsed)
1245{
1246	int i;
1247	struct tcb *tcp;
1248
1249	if (nprocs == tcbtabsize)
1250		expand_tcbtab();
1251
1252	for (i = 0; i < tcbtabsize; i++) {
1253		tcp = tcbtab[i];
1254		if ((tcp->flags & TCB_INUSE) == 0) {
1255			memset(tcp, 0, sizeof(*tcp));
1256			tcp->pid = pid;
1257			tcp->flags = TCB_INUSE | TCB_STARTUP;
1258			tcp->outf = outf; /* Initialise to current out file */
1259			tcp->pfd = -1;
1260			nprocs++;
1261			if (debug)
1262				fprintf(stderr, "new tcb for pid %d, active tcbs:%d\n", tcp->pid, nprocs);
1263			if (command_options_parsed)
1264				newoutf(tcp);
1265			return tcp;
1266		}
1267	}
1268	error_msg_and_die("bug in alloc_tcb");
1269}
1270
1271#ifdef USE_PROCFS
1272int
1273proc_open(struct tcb *tcp, int attaching)
1274{
1275	char proc[32];
1276	long arg;
1277#ifdef SVR4
1278	int i;
1279	sysset_t syscalls;
1280	sigset_t signals;
1281	fltset_t faults;
1282#endif
1283#ifndef HAVE_POLLABLE_PROCFS
1284	static int last_pfd;
1285#endif
1286
1287#ifdef HAVE_MP_PROCFS
1288	/* Open the process pseudo-files in /proc. */
1289	sprintf(proc, "/proc/%d/ctl", tcp->pid);
1290	tcp->pfd = open(proc, O_WRONLY|O_EXCL);
1291	if (tcp->pfd < 0) {
1292		perror("strace: open(\"/proc/...\", ...)");
1293		return -1;
1294	}
1295	set_cloexec_flag(tcp->pfd);
1296	sprintf(proc, "/proc/%d/status", tcp->pid);
1297	tcp->pfd_stat = open(proc, O_RDONLY|O_EXCL);
1298	if (tcp->pfd_stat < 0) {
1299		perror("strace: open(\"/proc/...\", ...)");
1300		return -1;
1301	}
1302	set_cloexec_flag(tcp->pfd_stat);
1303	sprintf(proc, "/proc/%d/as", tcp->pid);
1304	tcp->pfd_as = open(proc, O_RDONLY|O_EXCL);
1305	if (tcp->pfd_as < 0) {
1306		perror("strace: open(\"/proc/...\", ...)");
1307		return -1;
1308	}
1309	set_cloexec_flag(tcp->pfd_as);
1310#else
1311	/* Open the process pseudo-file in /proc. */
1312#ifndef FREEBSD
1313	sprintf(proc, "/proc/%d", tcp->pid);
1314	tcp->pfd = open(proc, O_RDWR|O_EXCL);
1315#else /* FREEBSD */
1316	sprintf(proc, "/proc/%d/mem", tcp->pid);
1317	tcp->pfd = open(proc, O_RDWR);
1318#endif /* FREEBSD */
1319	if (tcp->pfd < 0) {
1320		perror("strace: open(\"/proc/...\", ...)");
1321		return -1;
1322	}
1323	set_cloexec_flag(tcp->pfd);
1324#endif
1325#ifdef FREEBSD
1326	sprintf(proc, "/proc/%d/regs", tcp->pid);
1327	tcp->pfd_reg = open(proc, O_RDONLY);
1328	if (tcp->pfd_reg < 0) {
1329		perror("strace: open(\"/proc/.../regs\", ...)");
1330		return -1;
1331	}
1332	if (cflag) {
1333		sprintf(proc, "/proc/%d/status", tcp->pid);
1334		tcp->pfd_status = open(proc, O_RDONLY);
1335		if (tcp->pfd_status < 0) {
1336			perror("strace: open(\"/proc/.../status\", ...)");
1337			return -1;
1338		}
1339	} else
1340		tcp->pfd_status = -1;
1341#endif /* FREEBSD */
1342	rebuild_pollv();
1343	if (!attaching) {
1344		/*
1345		 * Wait for the child to pause.  Because of a race
1346		 * condition we have to poll for the event.
1347		 */
1348		for (;;) {
1349			if (IOCTL_STATUS(tcp) < 0) {
1350				perror("strace: PIOCSTATUS");
1351				return -1;
1352			}
1353			if (tcp->status.PR_FLAGS & PR_ASLEEP)
1354				break;
1355		}
1356	}
1357#ifndef FREEBSD
1358	/* Stop the process so that we own the stop. */
1359	if (IOCTL(tcp->pfd, PIOCSTOP, (char *)NULL) < 0) {
1360		perror("strace: PIOCSTOP");
1361		return -1;
1362	}
1363#endif
1364#ifdef PIOCSET
1365	/* Set Run-on-Last-Close. */
1366	arg = PR_RLC;
1367	if (IOCTL(tcp->pfd, PIOCSET, &arg) < 0) {
1368		perror("PIOCSET PR_RLC");
1369		return -1;
1370	}
1371	/* Set or Reset Inherit-on-Fork. */
1372	arg = PR_FORK;
1373	if (IOCTL(tcp->pfd, followfork ? PIOCSET : PIOCRESET, &arg) < 0) {
1374		perror("PIOC{SET,RESET} PR_FORK");
1375		return -1;
1376	}
1377#else  /* !PIOCSET */
1378#ifndef FREEBSD
1379	if (ioctl(tcp->pfd, PIOCSRLC) < 0) {
1380		perror("PIOCSRLC");
1381		return -1;
1382	}
1383	if (ioctl(tcp->pfd, followfork ? PIOCSFORK : PIOCRFORK) < 0) {
1384		perror("PIOC{S,R}FORK");
1385		return -1;
1386	}
1387#else /* FREEBSD */
1388	/* just unset the PF_LINGER flag for the Run-on-Last-Close. */
1389	if (ioctl(tcp->pfd, PIOCGFL, &arg) < 0) {
1390	        perror("PIOCGFL");
1391		return -1;
1392	}
1393	arg &= ~PF_LINGER;
1394	if (ioctl(tcp->pfd, PIOCSFL, arg) < 0) {
1395		perror("PIOCSFL");
1396		return -1;
1397	}
1398#endif /* FREEBSD */
1399#endif /* !PIOCSET */
1400#ifndef FREEBSD
1401	/* Enable all syscall entries we care about. */
1402	premptyset(&syscalls);
1403	for (i = 1; i < MAX_QUALS; ++i) {
1404		if (i > (sizeof syscalls) * CHAR_BIT) break;
1405		if (qual_flags[i] & QUAL_TRACE) praddset(&syscalls, i);
1406	}
1407	praddset(&syscalls, SYS_execve);
1408	if (followfork) {
1409		praddset(&syscalls, SYS_fork);
1410#ifdef SYS_forkall
1411		praddset(&syscalls, SYS_forkall);
1412#endif
1413#ifdef SYS_fork1
1414		praddset(&syscalls, SYS_fork1);
1415#endif
1416#ifdef SYS_rfork1
1417		praddset(&syscalls, SYS_rfork1);
1418#endif
1419#ifdef SYS_rforkall
1420		praddset(&syscalls, SYS_rforkall);
1421#endif
1422	}
1423	if (IOCTL(tcp->pfd, PIOCSENTRY, &syscalls) < 0) {
1424		perror("PIOCSENTRY");
1425		return -1;
1426	}
1427	/* Enable the syscall exits. */
1428	if (IOCTL(tcp->pfd, PIOCSEXIT, &syscalls) < 0) {
1429		perror("PIOSEXIT");
1430		return -1;
1431	}
1432	/* Enable signals we care about. */
1433	premptyset(&signals);
1434	for (i = 1; i < MAX_QUALS; ++i) {
1435		if (i > (sizeof signals) * CHAR_BIT) break;
1436		if (qual_flags[i] & QUAL_SIGNAL) praddset(&signals, i);
1437	}
1438	if (IOCTL(tcp->pfd, PIOCSTRACE, &signals) < 0) {
1439		perror("PIOCSTRACE");
1440		return -1;
1441	}
1442	/* Enable faults we care about */
1443	premptyset(&faults);
1444	for (i = 1; i < MAX_QUALS; ++i) {
1445		if (i > (sizeof faults) * CHAR_BIT) break;
1446		if (qual_flags[i] & QUAL_FAULT) praddset(&faults, i);
1447	}
1448	if (IOCTL(tcp->pfd, PIOCSFAULT, &faults) < 0) {
1449		perror("PIOCSFAULT");
1450		return -1;
1451	}
1452#else /* FREEBSD */
1453	/* set events flags. */
1454	arg = S_SIG | S_SCE | S_SCX;
1455	if (ioctl(tcp->pfd, PIOCBIS, arg) < 0) {
1456		perror("PIOCBIS");
1457		return -1;
1458	}
1459#endif /* FREEBSD */
1460	if (!attaching) {
1461#ifdef MIPS
1462		/*
1463		 * The SGI PRSABORT doesn't work for pause() so
1464		 * we send it a caught signal to wake it up.
1465		 */
1466		kill(tcp->pid, SIGINT);
1467#else /* !MIPS */
1468#ifdef PRSABORT
1469		/* The child is in a pause(), abort it. */
1470		arg = PRSABORT;
1471		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0) {
1472			perror("PIOCRUN");
1473			return -1;
1474		}
1475#endif
1476#endif /* !MIPS*/
1477#ifdef FREEBSD
1478		/* wake up the child if it received the SIGSTOP */
1479		kill(tcp->pid, SIGCONT);
1480#endif
1481		for (;;) {
1482			/* Wait for the child to do something. */
1483			if (IOCTL_WSTOP(tcp) < 0) {
1484				perror("PIOCWSTOP");
1485				return -1;
1486			}
1487			if (tcp->status.PR_WHY == PR_SYSENTRY) {
1488				tcp->flags &= ~TCB_INSYSCALL;
1489				get_scno(tcp);
1490				if (known_scno(tcp) == SYS_execve)
1491					break;
1492			}
1493			/* Set it running: maybe execve will be next. */
1494#ifndef FREEBSD
1495			arg = 0;
1496			if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0) {
1497#else /* FREEBSD */
1498			if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0) {
1499#endif /* FREEBSD */
1500				perror("PIOCRUN");
1501				return -1;
1502			}
1503#ifdef FREEBSD
1504			/* handle the case where we "opened" the child before
1505			   it did the kill -STOP */
1506			if (tcp->status.PR_WHY == PR_SIGNALLED &&
1507			    tcp->status.PR_WHAT == SIGSTOP)
1508			        kill(tcp->pid, SIGCONT);
1509#endif
1510		}
1511#ifndef FREEBSD
1512	}
1513#else /* FREEBSD */
1514	} else {
1515		if (attaching < 2) {
1516			/* We are attaching to an already running process.
1517			 * Try to figure out the state of the process in syscalls,
1518			 * to handle the first event well.
1519			 * This is done by having a look at the "wchan" property of the
1520			 * process, which tells where it is stopped (if it is). */
1521			FILE * status;
1522			char wchan[20]; /* should be enough */
1523
1524			sprintf(proc, "/proc/%d/status", tcp->pid);
1525			status = fopen(proc, "r");
1526			if (status &&
1527			    (fscanf(status, "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d"
1528				    "%*d,%*d %*d,%*d %19s", wchan) == 1) &&
1529			    strcmp(wchan, "nochan") && strcmp(wchan, "spread") &&
1530			    strcmp(wchan, "stopevent")) {
1531				/* The process is asleep in the middle of a syscall.
1532				   Fake the syscall entry event */
1533				tcp->flags &= ~(TCB_INSYSCALL|TCB_STARTUP);
1534				tcp->status.PR_WHY = PR_SYSENTRY;
1535				trace_syscall(tcp);
1536			}
1537			if (status)
1538				fclose(status);
1539		} /* otherwise it's a fork being followed */
1540	}
1541#endif /* FREEBSD */
1542#ifndef HAVE_POLLABLE_PROCFS
1543	if (proc_poll_pipe[0] != -1)
1544		proc_poller(tcp->pfd);
1545	else if (nprocs > 1) {
1546		proc_poll_open();
1547		proc_poller(last_pfd);
1548		proc_poller(tcp->pfd);
1549	}
1550	last_pfd = tcp->pfd;
1551#endif /* !HAVE_POLLABLE_PROCFS */
1552	return 0;
1553}
1554
1555#endif /* USE_PROCFS */
1556
1557struct tcb *
1558pid2tcb(int pid)
1559{
1560	int i;
1561
1562	if (pid <= 0)
1563		return NULL;
1564
1565	for (i = 0; i < tcbtabsize; i++) {
1566		struct tcb *tcp = tcbtab[i];
1567		if (tcp->pid == pid && (tcp->flags & TCB_INUSE))
1568			return tcp;
1569	}
1570
1571	return NULL;
1572}
1573
1574#ifdef USE_PROCFS
1575
1576static struct tcb *
1577first_used_tcb(void)
1578{
1579	int i;
1580	struct tcb *tcp;
1581	for (i = 0; i < tcbtabsize; i++) {
1582		tcp = tcbtab[i];
1583		if (tcp->flags & TCB_INUSE)
1584			return tcp;
1585	}
1586	return NULL;
1587}
1588
1589static struct tcb *
1590pfd2tcb(int pfd)
1591{
1592	int i;
1593
1594	for (i = 0; i < tcbtabsize; i++) {
1595		struct tcb *tcp = tcbtab[i];
1596		if (tcp->pfd != pfd)
1597			continue;
1598		if (tcp->flags & TCB_INUSE)
1599			return tcp;
1600	}
1601	return NULL;
1602}
1603
1604#endif /* USE_PROCFS */
1605
1606void
1607droptcb(struct tcb *tcp)
1608{
1609	if (tcp->pid == 0)
1610		return;
1611
1612	nprocs--;
1613	if (debug)
1614		fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
1615
1616	if (tcp->pfd != -1) {
1617		close(tcp->pfd);
1618		tcp->pfd = -1;
1619#ifdef FREEBSD
1620		if (tcp->pfd_reg != -1) {
1621		        close(tcp->pfd_reg);
1622		        tcp->pfd_reg = -1;
1623		}
1624		if (tcp->pfd_status != -1) {
1625			close(tcp->pfd_status);
1626			tcp->pfd_status = -1;
1627		}
1628#endif /* !FREEBSD */
1629#ifdef USE_PROCFS
1630		tcp->flags = 0; /* rebuild_pollv needs it */
1631		rebuild_pollv();
1632#endif
1633	}
1634
1635	if (outfname && followfork > 1 && tcp->outf)
1636		fclose(tcp->outf);
1637
1638	memset(tcp, 0, sizeof(*tcp));
1639}
1640
1641/* detach traced process; continue with sig
1642   Never call DETACH twice on the same process as both unattached and
1643   attached-unstopped processes give the same ESRCH.  For unattached process we
1644   would SIGSTOP it and wait for its SIGSTOP notification forever.  */
1645
1646static int
1647detach(struct tcb *tcp, int sig)
1648{
1649	int error = 0;
1650#ifdef LINUX
1651	int status, catch_sigstop;
1652#endif
1653
1654	if (tcp->flags & TCB_BPTSET)
1655		clearbpt(tcp);
1656
1657#ifdef LINUX
1658	/*
1659	 * Linux wrongly insists the child be stopped
1660	 * before detaching.  Arghh.  We go through hoops
1661	 * to make a clean break of things.
1662	 */
1663#if defined(SPARC)
1664#undef PTRACE_DETACH
1665#define PTRACE_DETACH PTRACE_SUNDETACH
1666#endif
1667	/*
1668	 * On TCB_STARTUP we did PTRACE_ATTACH but still did not get the
1669	 * expected SIGSTOP.  We must catch exactly one as otherwise the
1670	 * detached process would be left stopped (process state T).
1671	 */
1672	catch_sigstop = (tcp->flags & TCB_STARTUP);
1673	error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, sig);
1674	if (error == 0) {
1675		/* On a clear day, you can see forever. */
1676	}
1677	else if (errno != ESRCH) {
1678		/* Shouldn't happen. */
1679		perror("detach: ptrace(PTRACE_DETACH, ...)");
1680	}
1681	else if (my_tkill(tcp->pid, 0) < 0) {
1682		if (errno != ESRCH)
1683			perror("detach: checking sanity");
1684	}
1685	else if (!catch_sigstop && my_tkill(tcp->pid, SIGSTOP) < 0) {
1686		if (errno != ESRCH)
1687			perror("detach: stopping child");
1688	}
1689	else
1690		catch_sigstop = 1;
1691	if (catch_sigstop) {
1692		for (;;) {
1693#ifdef __WALL
1694			if (wait4(tcp->pid, &status, __WALL, NULL) < 0) {
1695				if (errno == ECHILD) /* Already gone.  */
1696					break;
1697				if (errno != EINVAL) {
1698					perror("detach: waiting");
1699					break;
1700				}
1701#endif /* __WALL */
1702				/* No __WALL here.  */
1703				if (waitpid(tcp->pid, &status, 0) < 0) {
1704					if (errno != ECHILD) {
1705						perror("detach: waiting");
1706						break;
1707					}
1708#ifdef __WCLONE
1709					/* If no processes, try clones.  */
1710					if (wait4(tcp->pid, &status, __WCLONE,
1711						  NULL) < 0) {
1712						if (errno != ECHILD)
1713							perror("detach: waiting");
1714						break;
1715					}
1716#endif /* __WCLONE */
1717				}
1718#ifdef __WALL
1719			}
1720#endif
1721			if (!WIFSTOPPED(status)) {
1722				/* Au revoir, mon ami. */
1723				break;
1724			}
1725			if (WSTOPSIG(status) == SIGSTOP) {
1726				ptrace_restart(PTRACE_DETACH, tcp, sig);
1727				break;
1728			}
1729			error = ptrace_restart(PTRACE_CONT, tcp,
1730					WSTOPSIG(status) == syscall_trap_sig ? 0
1731					: WSTOPSIG(status));
1732			if (error < 0)
1733				break;
1734		}
1735	}
1736#endif /* LINUX */
1737
1738#if defined(SUNOS4)
1739	/* PTRACE_DETACH won't respect `sig' argument, so we post it here. */
1740	if (sig && kill(tcp->pid, sig) < 0)
1741		perror("detach: kill");
1742	sig = 0;
1743	error = ptrace_restart(PTRACE_DETACH, tcp, sig);
1744#endif /* SUNOS4 */
1745
1746	if (!qflag)
1747		fprintf(stderr, "Process %u detached\n", tcp->pid);
1748
1749	droptcb(tcp);
1750
1751	return error;
1752}
1753
1754#ifdef USE_PROCFS
1755
1756static void reaper(int sig)
1757{
1758	int pid;
1759	int status;
1760
1761	while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
1762	}
1763}
1764
1765#endif /* USE_PROCFS */
1766
1767static void
1768cleanup(void)
1769{
1770	int i;
1771	struct tcb *tcp;
1772
1773	for (i = 0; i < tcbtabsize; i++) {
1774		tcp = tcbtab[i];
1775		if (!(tcp->flags & TCB_INUSE))
1776			continue;
1777		if (debug)
1778			fprintf(stderr,
1779				"cleanup: looking at pid %u\n", tcp->pid);
1780		if (tcp_last &&
1781		    (!outfname || followfork < 2 || tcp_last == tcp)) {
1782			tprintf(" <unfinished ...>");
1783			printtrailer();
1784		}
1785		if (tcp->flags & TCB_ATTACHED)
1786			detach(tcp, 0);
1787		else {
1788			kill(tcp->pid, SIGCONT);
1789			kill(tcp->pid, SIGTERM);
1790		}
1791	}
1792	if (cflag)
1793		call_summary(outf);
1794}
1795
1796static void
1797interrupt(int sig)
1798{
1799	interrupted = 1;
1800}
1801
1802#ifndef HAVE_STRERROR
1803
1804#if !HAVE_DECL_SYS_ERRLIST
1805extern int sys_nerr;
1806extern char *sys_errlist[];
1807#endif /* HAVE_DECL_SYS_ERRLIST */
1808
1809const char *
1810strerror(int err_no)
1811{
1812	static char buf[64];
1813
1814	if (err_no < 1 || err_no >= sys_nerr) {
1815		sprintf(buf, "Unknown error %d", err_no);
1816		return buf;
1817	}
1818	return sys_errlist[err_no];
1819}
1820
1821#endif /* HAVE_STERRROR */
1822
1823#ifndef HAVE_STRSIGNAL
1824
1825#if defined HAVE_SYS_SIGLIST && !defined HAVE_DECL_SYS_SIGLIST
1826extern char *sys_siglist[];
1827#endif
1828#if defined HAVE_SYS__SIGLIST && !defined HAVE_DECL__SYS_SIGLIST
1829extern char *_sys_siglist[];
1830#endif
1831
1832const char *
1833strsignal(int sig)
1834{
1835	static char buf[64];
1836
1837	if (sig < 1 || sig >= NSIG) {
1838		sprintf(buf, "Unknown signal %d", sig);
1839		return buf;
1840	}
1841#ifdef HAVE__SYS_SIGLIST
1842	return _sys_siglist[sig];
1843#else
1844	return sys_siglist[sig];
1845#endif
1846}
1847
1848#endif /* HAVE_STRSIGNAL */
1849
1850#ifdef USE_PROCFS
1851
1852static void
1853rebuild_pollv(void)
1854{
1855	int i, j;
1856
1857	if (pollv != NULL)
1858		free(pollv);
1859	pollv = (struct pollfd *) malloc(nprocs * sizeof pollv[0]);
1860	if (pollv == NULL) {
1861		error_msg_and_die("Out of memory");
1862	}
1863
1864	for (i = j = 0; i < tcbtabsize; i++) {
1865		struct tcb *tcp = tcbtab[i];
1866		if (!(tcp->flags & TCB_INUSE))
1867			continue;
1868		pollv[j].fd = tcp->pfd;
1869		pollv[j].events = POLLWANT;
1870		j++;
1871	}
1872	if (j != nprocs) {
1873		error_msg_and_die("proc miscount");
1874	}
1875}
1876
1877#ifndef HAVE_POLLABLE_PROCFS
1878
1879static void
1880proc_poll_open(void)
1881{
1882	int i;
1883
1884	if (pipe(proc_poll_pipe) < 0) {
1885		perror_msg_and_die("pipe");
1886	}
1887	for (i = 0; i < 2; i++) {
1888		set_cloexec_flag(proc_poll_pipe[i]);
1889	}
1890}
1891
1892static int
1893proc_poll(struct pollfd *pollv, int nfds, int timeout)
1894{
1895	int i;
1896	int n;
1897	struct proc_pollfd pollinfo;
1898
1899	n = read(proc_poll_pipe[0], &pollinfo, sizeof(pollinfo));
1900	if (n < 0)
1901		return n;
1902	if (n != sizeof(struct proc_pollfd)) {
1903		error_msg_and_die("panic: short read: %d", n);
1904	}
1905	for (i = 0; i < nprocs; i++) {
1906		if (pollv[i].fd == pollinfo.fd)
1907			pollv[i].revents = pollinfo.revents;
1908		else
1909			pollv[i].revents = 0;
1910	}
1911	poller_pid = pollinfo.pid;
1912	return 1;
1913}
1914
1915static void
1916wakeup_handler(int sig)
1917{
1918}
1919
1920static void
1921proc_poller(int pfd)
1922{
1923	struct proc_pollfd pollinfo;
1924	struct sigaction sa;
1925	sigset_t blocked_set, empty_set;
1926	int i;
1927	int n;
1928	struct rlimit rl;
1929#ifdef FREEBSD
1930	struct procfs_status pfs;
1931#endif /* FREEBSD */
1932
1933	switch (fork()) {
1934	case -1:
1935		perror_msg_and_die("fork");
1936	case 0:
1937		break;
1938	default:
1939		return;
1940	}
1941
1942	sa.sa_handler = interactive ? SIG_DFL : SIG_IGN;
1943	sa.sa_flags = 0;
1944	sigemptyset(&sa.sa_mask);
1945	sigaction(SIGHUP, &sa, NULL);
1946	sigaction(SIGINT, &sa, NULL);
1947	sigaction(SIGQUIT, &sa, NULL);
1948	sigaction(SIGPIPE, &sa, NULL);
1949	sigaction(SIGTERM, &sa, NULL);
1950	sa.sa_handler = wakeup_handler;
1951	sigaction(SIGUSR1, &sa, NULL);
1952	sigemptyset(&blocked_set);
1953	sigaddset(&blocked_set, SIGUSR1);
1954	sigprocmask(SIG_BLOCK, &blocked_set, NULL);
1955	sigemptyset(&empty_set);
1956
1957	if (getrlimit(RLIMIT_NOFILE, &rl) < 0) {
1958		perror_msg_and_die("getrlimit(RLIMIT_NOFILE, ...)");
1959	}
1960	n = rl.rlim_cur;
1961	for (i = 0; i < n; i++) {
1962		if (i != pfd && i != proc_poll_pipe[1])
1963			close(i);
1964	}
1965
1966	pollinfo.fd = pfd;
1967	pollinfo.pid = getpid();
1968	for (;;) {
1969#ifndef FREEBSD
1970		if (ioctl(pfd, PIOCWSTOP, NULL) < 0)
1971#else
1972		if (ioctl(pfd, PIOCWSTOP, &pfs) < 0)
1973#endif
1974		{
1975			switch (errno) {
1976			case EINTR:
1977				continue;
1978			case EBADF:
1979				pollinfo.revents = POLLERR;
1980				break;
1981			case ENOENT:
1982				pollinfo.revents = POLLHUP;
1983				break;
1984			default:
1985				perror("proc_poller: PIOCWSTOP");
1986			}
1987			write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
1988			_exit(0);
1989		}
1990		pollinfo.revents = POLLWANT;
1991		write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
1992		sigsuspend(&empty_set);
1993	}
1994}
1995
1996#endif /* !HAVE_POLLABLE_PROCFS */
1997
1998static int
1999choose_pfd()
2000{
2001	int i, j;
2002	struct tcb *tcp;
2003
2004	static int last;
2005
2006	if (followfork < 2 &&
2007	    last < nprocs && (pollv[last].revents & POLLWANT)) {
2008		/*
2009		 * The previous process is ready to run again.  We'll
2010		 * let it do so if it is currently in a syscall.  This
2011		 * heuristic improves the readability of the trace.
2012		 */
2013		tcp = pfd2tcb(pollv[last].fd);
2014		if (tcp && (tcp->flags & TCB_INSYSCALL))
2015			return pollv[last].fd;
2016	}
2017
2018	for (i = 0; i < nprocs; i++) {
2019		/* Let competing children run round robin. */
2020		j = (i + last + 1) % nprocs;
2021		if (pollv[j].revents & (POLLHUP | POLLERR)) {
2022			tcp = pfd2tcb(pollv[j].fd);
2023			if (!tcp) {
2024				error_msg_and_die("lost proc");
2025			}
2026			droptcb(tcp);
2027			return -1;
2028		}
2029		if (pollv[j].revents & POLLWANT) {
2030			last = j;
2031			return pollv[j].fd;
2032		}
2033	}
2034	error_msg_and_die("nothing ready");
2035}
2036
2037static int
2038trace(void)
2039{
2040#ifdef POLL_HACK
2041	struct tcb *in_syscall = NULL;
2042#endif
2043	struct tcb *tcp;
2044	int pfd;
2045	int what;
2046	int ioctl_result = 0, ioctl_errno = 0;
2047	long arg;
2048
2049	for (;;) {
2050		if (interactive)
2051			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2052
2053		if (nprocs == 0)
2054			break;
2055
2056		switch (nprocs) {
2057		case 1:
2058#ifndef HAVE_POLLABLE_PROCFS
2059			if (proc_poll_pipe[0] == -1) {
2060#endif
2061				tcp = first_used_tcb();
2062				if (!tcp)
2063					continue;
2064				pfd = tcp->pfd;
2065				if (pfd == -1)
2066					continue;
2067				break;
2068#ifndef HAVE_POLLABLE_PROCFS
2069			}
2070			/* fall through ... */
2071#endif /* !HAVE_POLLABLE_PROCFS */
2072		default:
2073#ifdef HAVE_POLLABLE_PROCFS
2074#ifdef POLL_HACK
2075		        /* On some systems (e.g. UnixWare) we get too much ugly
2076			   "unfinished..." stuff when multiple proceses are in
2077			   syscalls.  Here's a nasty hack */
2078
2079			if (in_syscall) {
2080				struct pollfd pv;
2081				tcp = in_syscall;
2082				in_syscall = NULL;
2083				pv.fd = tcp->pfd;
2084				pv.events = POLLWANT;
2085				what = poll(&pv, 1, 1);
2086				if (what < 0) {
2087					if (interrupted)
2088						return 0;
2089					continue;
2090				}
2091				else if (what == 1 && pv.revents & POLLWANT) {
2092					goto FOUND;
2093				}
2094			}
2095#endif
2096
2097			if (poll(pollv, nprocs, INFTIM) < 0) {
2098				if (interrupted)
2099					return 0;
2100				continue;
2101			}
2102#else /* !HAVE_POLLABLE_PROCFS */
2103			if (proc_poll(pollv, nprocs, INFTIM) < 0) {
2104				if (interrupted)
2105					return 0;
2106				continue;
2107			}
2108#endif /* !HAVE_POLLABLE_PROCFS */
2109			pfd = choose_pfd();
2110			if (pfd == -1)
2111				continue;
2112			break;
2113		}
2114
2115		/* Look up `pfd' in our table. */
2116		tcp = pfd2tcb(pfd);
2117		if (tcp == NULL) {
2118			error_msg_and_die("unknown pfd: %u", pfd);
2119		}
2120#ifdef POLL_HACK
2121	FOUND:
2122#endif
2123		/* Get the status of the process. */
2124		if (!interrupted) {
2125#ifndef FREEBSD
2126			ioctl_result = IOCTL_WSTOP(tcp);
2127#else /* FREEBSD */
2128			/* Thanks to some scheduling mystery, the first poller
2129			   sometimes waits for the already processed end of fork
2130			   event. Doing a non blocking poll here solves the problem. */
2131			if (proc_poll_pipe[0] != -1)
2132				ioctl_result = IOCTL_STATUS(tcp);
2133			else
2134				ioctl_result = IOCTL_WSTOP(tcp);
2135#endif /* FREEBSD */
2136			ioctl_errno = errno;
2137#ifndef HAVE_POLLABLE_PROCFS
2138			if (proc_poll_pipe[0] != -1) {
2139				if (ioctl_result < 0)
2140					kill(poller_pid, SIGKILL);
2141				else
2142					kill(poller_pid, SIGUSR1);
2143			}
2144#endif /* !HAVE_POLLABLE_PROCFS */
2145		}
2146		if (interrupted)
2147			return 0;
2148
2149		if (interactive)
2150			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2151
2152		if (ioctl_result < 0) {
2153			/* Find out what happened if it failed. */
2154			switch (ioctl_errno) {
2155			case EINTR:
2156			case EBADF:
2157				continue;
2158#ifdef FREEBSD
2159			case ENOTTY:
2160#endif
2161			case ENOENT:
2162				droptcb(tcp);
2163				continue;
2164			default:
2165				perror_msg_and_die("PIOCWSTOP");
2166			}
2167		}
2168
2169#ifdef FREEBSD
2170		if ((tcp->flags & TCB_STARTUP) && (tcp->status.PR_WHY == PR_SYSEXIT)) {
2171			/* discard first event for a syscall we never entered */
2172			IOCTL(tcp->pfd, PIOCRUN, 0);
2173			continue;
2174		}
2175#endif
2176
2177		/* clear the just started flag */
2178		tcp->flags &= ~TCB_STARTUP;
2179
2180		/* set current output file */
2181		outf = tcp->outf;
2182		curcol = tcp->curcol;
2183
2184		if (cflag) {
2185			struct timeval stime;
2186#ifdef FREEBSD
2187			char buf[1024];
2188			int len;
2189
2190			len = pread(tcp->pfd_status, buf, sizeof(buf) - 1, 0);
2191			if (len > 0) {
2192				buf[len] = '\0';
2193				sscanf(buf,
2194				       "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d %*d,%*d %ld,%ld",
2195				       &stime.tv_sec, &stime.tv_usec);
2196			} else
2197				stime.tv_sec = stime.tv_usec = 0;
2198#else /* !FREEBSD */
2199			stime.tv_sec = tcp->status.pr_stime.tv_sec;
2200			stime.tv_usec = tcp->status.pr_stime.tv_nsec/1000;
2201#endif /* !FREEBSD */
2202			tv_sub(&tcp->dtime, &stime, &tcp->stime);
2203			tcp->stime = stime;
2204		}
2205		what = tcp->status.PR_WHAT;
2206		switch (tcp->status.PR_WHY) {
2207#ifndef FREEBSD
2208		case PR_REQUESTED:
2209			if (tcp->status.PR_FLAGS & PR_ASLEEP) {
2210				tcp->status.PR_WHY = PR_SYSENTRY;
2211				if (trace_syscall(tcp) < 0) {
2212					error_msg_and_die("syscall trouble");
2213				}
2214			}
2215			break;
2216#endif /* !FREEBSD */
2217		case PR_SYSENTRY:
2218#ifdef POLL_HACK
2219		        in_syscall = tcp;
2220#endif
2221		case PR_SYSEXIT:
2222			if (trace_syscall(tcp) < 0) {
2223				error_msg_and_die("syscall trouble");
2224			}
2225			break;
2226		case PR_SIGNALLED:
2227			if (cflag != CFLAG_ONLY_STATS
2228			    && (qual_flags[what] & QUAL_SIGNAL)) {
2229				printleader(tcp);
2230				tprintf("--- %s (%s) ---",
2231					signame(what), strsignal(what));
2232				printtrailer();
2233#ifdef PR_INFO
2234				if (tcp->status.PR_INFO.si_signo == what) {
2235					printleader(tcp);
2236					tprintf("    siginfo=");
2237					printsiginfo(&tcp->status.PR_INFO, 1);
2238					printtrailer();
2239				}
2240#endif
2241			}
2242			break;
2243		case PR_FAULTED:
2244			if (cflag != CFLAGS_ONLY_STATS
2245			    && (qual_flags[what] & QUAL_FAULT)) {
2246				printleader(tcp);
2247				tprintf("=== FAULT %d ===", what);
2248				printtrailer();
2249			}
2250			break;
2251#ifdef FREEBSD
2252		case 0: /* handle case we polled for nothing */
2253			continue;
2254#endif
2255		default:
2256			error_msg_and_die("odd stop %d", tcp->status.PR_WHY);
2257			break;
2258		}
2259		/* Remember current print column before continuing. */
2260		tcp->curcol = curcol;
2261		arg = 0;
2262#ifndef FREEBSD
2263		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0)
2264#else
2265		if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0)
2266#endif
2267		{
2268			perror_msg_and_die("PIOCRUN");
2269		}
2270	}
2271	return 0;
2272}
2273
2274#else /* !USE_PROCFS */
2275
2276static int
2277trace()
2278{
2279	int pid;
2280	int wait_errno;
2281	int status;
2282	struct tcb *tcp;
2283#ifdef LINUX
2284	struct rusage ru;
2285	struct rusage *rup = cflag ? &ru : NULL;
2286# ifdef __WALL
2287	static int wait4_options = __WALL;
2288# endif
2289#endif /* LINUX */
2290
2291	while (nprocs != 0) {
2292		if (interrupted)
2293			return 0;
2294		if (interactive)
2295			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2296#ifdef LINUX
2297# ifdef __WALL
2298		pid = wait4(-1, &status, wait4_options, rup);
2299		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
2300			/* this kernel does not support __WALL */
2301			wait4_options &= ~__WALL;
2302			pid = wait4(-1, &status, wait4_options, rup);
2303		}
2304		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
2305			/* most likely a "cloned" process */
2306			pid = wait4(-1, &status, __WCLONE, rup);
2307			if (pid < 0) {
2308				perror_msg("wait4(__WCLONE) failed");
2309			}
2310		}
2311# else
2312		pid = wait4(-1, &status, 0, rup);
2313# endif /* __WALL */
2314#endif /* LINUX */
2315#ifdef SUNOS4
2316		pid = wait(&status);
2317#endif
2318		wait_errno = errno;
2319		if (interactive)
2320			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2321
2322		if (pid < 0) {
2323			switch (wait_errno) {
2324			case EINTR:
2325				continue;
2326			case ECHILD:
2327				/*
2328				 * We would like to verify this case
2329				 * but sometimes a race in Solbourne's
2330				 * version of SunOS sometimes reports
2331				 * ECHILD before sending us SIGCHILD.
2332				 */
2333				return 0;
2334			default:
2335				errno = wait_errno;
2336				perror("strace: wait");
2337				return -1;
2338			}
2339		}
2340		if (pid == popen_pid) {
2341			if (WIFEXITED(status) || WIFSIGNALED(status))
2342				popen_pid = 0;
2343			continue;
2344		}
2345		if (debug) {
2346			char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
2347#ifdef LINUX
2348			unsigned ev = (unsigned)status >> 16;
2349			if (ev) {
2350				static const char *const event_names[] = {
2351					[PTRACE_EVENT_CLONE] = "CLONE",
2352					[PTRACE_EVENT_FORK]  = "FORK",
2353					[PTRACE_EVENT_VFORK] = "VFORK",
2354					[PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
2355					[PTRACE_EVENT_EXEC]  = "EXEC",
2356					[PTRACE_EVENT_EXIT]  = "EXIT",
2357				};
2358				const char *e;
2359				if (ev < ARRAY_SIZE(event_names))
2360					e = event_names[ev];
2361				else {
2362					sprintf(buf, "?? (%u)", ev);
2363					e = buf;
2364				}
2365				fprintf(stderr, " PTRACE_EVENT_%s", e);
2366			}
2367#endif
2368			strcpy(buf, "???");
2369			if (WIFSIGNALED(status))
2370#ifdef WCOREDUMP
2371				sprintf(buf, "WIFSIGNALED,%ssig=%s",
2372						WCOREDUMP(status) ? "core," : "",
2373						signame(WTERMSIG(status)));
2374#else
2375				sprintf(buf, "WIFSIGNALED,sig=%s",
2376						signame(WTERMSIG(status)));
2377#endif
2378			if (WIFEXITED(status))
2379				sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
2380			if (WIFSTOPPED(status))
2381				sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
2382#ifdef WIFCONTINUED
2383			if (WIFCONTINUED(status))
2384				strcpy(buf, "WIFCONTINUED");
2385#endif
2386			fprintf(stderr, " [wait(0x%04x) = %u] %s\n", status, pid, buf);
2387		}
2388
2389		/* Look up `pid' in our table. */
2390		tcp = pid2tcb(pid);
2391		if (tcp == NULL) {
2392#ifdef LINUX
2393			if (followfork) {
2394				/* This is needed to go with the CLONE_PTRACE
2395				   changes in process.c/util.c: we might see
2396				   the child's initial trap before we see the
2397				   parent return from the clone syscall.
2398				   Leave the child suspended until the parent
2399				   returns from its system call.  Only then
2400				   will we have the association of parent and
2401				   child so that we know how to do clearbpt
2402				   in the child.  */
2403				tcp = alloctcb(pid);
2404				tcp->flags |= TCB_ATTACHED;
2405				if (!qflag)
2406					fprintf(stderr, "Process %d attached\n",
2407						pid);
2408			}
2409			else
2410				/* This can happen if a clone call used
2411				   CLONE_PTRACE itself.  */
2412#endif
2413			{
2414				if (WIFSTOPPED(status))
2415					ptrace(PTRACE_CONT, pid, (char *) 1, 0);
2416				error_msg_and_die("Unknown pid: %u", pid);
2417			}
2418		}
2419		/* set current output file */
2420		outf = tcp->outf;
2421		curcol = tcp->curcol;
2422#ifdef LINUX
2423		if (cflag) {
2424			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
2425			tcp->stime = ru.ru_stime;
2426		}
2427#endif
2428
2429		if (WIFSIGNALED(status)) {
2430			if (pid == strace_child)
2431				exit_code = 0x100 | WTERMSIG(status);
2432			if (cflag != CFLAG_ONLY_STATS
2433			    && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL)) {
2434				printleader(tcp);
2435#ifdef WCOREDUMP
2436				tprintf("+++ killed by %s %s+++",
2437					signame(WTERMSIG(status)),
2438					WCOREDUMP(status) ? "(core dumped) " : "");
2439#else
2440				tprintf("+++ killed by %s +++",
2441					signame(WTERMSIG(status)));
2442#endif
2443				printtrailer();
2444			}
2445			droptcb(tcp);
2446			continue;
2447		}
2448		if (WIFEXITED(status)) {
2449			if (pid == strace_child)
2450				exit_code = WEXITSTATUS(status);
2451			if (tcp == tcp_last) {
2452				if ((tcp->flags & (TCB_INSYSCALL|TCB_REPRINT)) == TCB_INSYSCALL)
2453					tprintf(" <unfinished ... exit status %d>\n",
2454						WEXITSTATUS(status));
2455				tcp_last = NULL;
2456			}
2457			if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
2458				printleader(tcp);
2459				tprintf("+++ exited with %d +++", WEXITSTATUS(status));
2460				printtrailer();
2461			}
2462			droptcb(tcp);
2463			continue;
2464		}
2465		if (!WIFSTOPPED(status)) {
2466			fprintf(stderr, "PANIC: pid %u not stopped\n", pid);
2467			droptcb(tcp);
2468			continue;
2469		}
2470
2471		if (status >> 16) {
2472			/* Ptrace event (we ignore all of them for now) */
2473			goto tracing;
2474		}
2475
2476		/*
2477		 * Interestingly, the process may stop
2478		 * with STOPSIG equal to some other signal
2479		 * than SIGSTOP if we happend to attach
2480		 * just before the process takes a signal.
2481		 * A no-MMU vforked child won't send up a signal,
2482		 * so skip the first (lost) execve notification.
2483		 */
2484		if ((tcp->flags & TCB_STARTUP) &&
2485		    (WSTOPSIG(status) == SIGSTOP || strace_vforked)) {
2486			/*
2487			 * This flag is there to keep us in sync.
2488			 * Next time this process stops it should
2489			 * really be entering a system call.
2490			 */
2491			tcp->flags &= ~TCB_STARTUP;
2492			if (tcp->flags & TCB_BPTSET) {
2493				/*
2494				 * One example is a breakpoint inherited from
2495				 * parent through fork ().
2496				 */
2497				if (clearbpt(tcp) < 0) /* Pretty fatal */ {
2498					droptcb(tcp);
2499					cleanup();
2500					return -1;
2501				}
2502			}
2503#ifdef LINUX
2504			if (ptrace_setoptions) {
2505				if (debug)
2506					fprintf(stderr, "setting opts %x on pid %d\n", ptrace_setoptions, tcp->pid);
2507				if (ptrace(PTRACE_SETOPTIONS, tcp->pid, NULL, ptrace_setoptions) < 0) {
2508					if (errno != ESRCH) {
2509						/* Should never happen, really */
2510						perror_msg_and_die("PTRACE_SETOPTIONS");
2511					}
2512				}
2513			}
2514#endif
2515			goto tracing;
2516		}
2517
2518		if (WSTOPSIG(status) != syscall_trap_sig) {
2519			if (WSTOPSIG(status) == SIGSTOP &&
2520					(tcp->flags & TCB_SIGTRAPPED)) {
2521				/*
2522				 * Trapped attempt to block SIGTRAP
2523				 * Hope we are back in control now.
2524				 */
2525				tcp->flags &= ~(TCB_INSYSCALL | TCB_SIGTRAPPED);
2526				if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
2527					cleanup();
2528					return -1;
2529				}
2530				continue;
2531			}
2532			if (cflag != CFLAG_ONLY_STATS
2533			    && (qual_flags[WSTOPSIG(status)] & QUAL_SIGNAL)) {
2534				siginfo_t si;
2535#if defined(PT_CR_IPSR) && defined(PT_CR_IIP)
2536				long pc = 0;
2537				long psr = 0;
2538
2539				upeek(tcp, PT_CR_IPSR, &psr);
2540				upeek(tcp, PT_CR_IIP, &pc);
2541
2542# define PSR_RI	41
2543				pc += (psr >> PSR_RI) & 0x3;
2544# define PC_FORMAT_STR	" @ %lx"
2545# define PC_FORMAT_ARG	pc
2546#else
2547# define PC_FORMAT_STR	"%s"
2548# define PC_FORMAT_ARG	""
2549#endif
2550				printleader(tcp);
2551				if (ptrace(PTRACE_GETSIGINFO, pid, 0, &si) == 0) {
2552					tprintf("--- ");
2553					printsiginfo(&si, verbose(tcp));
2554					tprintf(" (%s)" PC_FORMAT_STR " ---",
2555						strsignal(WSTOPSIG(status)),
2556						PC_FORMAT_ARG);
2557				} else
2558					tprintf("--- %s by %s" PC_FORMAT_STR " ---",
2559						strsignal(WSTOPSIG(status)),
2560						signame(WSTOPSIG(status)),
2561						PC_FORMAT_ARG);
2562				printtrailer();
2563			}
2564			if (ptrace_restart(PTRACE_SYSCALL, tcp, WSTOPSIG(status)) < 0) {
2565				cleanup();
2566				return -1;
2567			}
2568			continue;
2569		}
2570		/* we handled the STATUS, we are permitted to interrupt now. */
2571		if (interrupted)
2572			return 0;
2573		if (trace_syscall(tcp) < 0 && !tcp->ptrace_errno) {
2574			/* ptrace() failed in trace_syscall() with ESRCH.
2575			 * Likely a result of process disappearing mid-flight.
2576			 * Observed case: exit_group() terminating
2577			 * all processes in thread group. In this case, threads
2578			 * "disappear" in an unpredictable moment without any
2579			 * notification to strace via wait().
2580			 */
2581			if (tcp->flags & TCB_ATTACHED) {
2582				if (tcp_last) {
2583					/* Do we have dangling line "syscall(param, param"?
2584					 * Finish the line then.
2585					 */
2586					tcp_last->flags |= TCB_REPRINT;
2587					tprintf(" <unfinished ...>");
2588					printtrailer();
2589				}
2590				detach(tcp, 0);
2591			} else {
2592				ptrace(PTRACE_KILL,
2593					tcp->pid, (char *) 1, SIGTERM);
2594				droptcb(tcp);
2595			}
2596			continue;
2597		}
2598	tracing:
2599		/* Remember current print column before continuing. */
2600		tcp->curcol = curcol;
2601		if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
2602			cleanup();
2603			return -1;
2604		}
2605	}
2606	return 0;
2607}
2608
2609#endif /* !USE_PROCFS */
2610
2611void
2612tprintf(const char *fmt, ...)
2613{
2614	va_list args;
2615
2616	va_start(args, fmt);
2617	if (outf) {
2618		int n = vfprintf(outf, fmt, args);
2619		if (n < 0) {
2620			if (outf != stderr)
2621				perror(outfname == NULL
2622				       ? "<writing to pipe>" : outfname);
2623		} else
2624			curcol += n;
2625	}
2626	va_end(args);
2627	return;
2628}
2629
2630void
2631printleader(struct tcb *tcp)
2632{
2633	if (tcp_last) {
2634		if (tcp_last->ptrace_errno) {
2635			if (tcp_last->flags & TCB_INSYSCALL) {
2636				tprintf(" <unavailable>) ");
2637				tabto(acolumn);
2638			}
2639			tprintf("= ? <unavailable>\n");
2640			tcp_last->ptrace_errno = 0;
2641		} else if (!outfname || followfork < 2 || tcp_last == tcp) {
2642			tcp_last->flags |= TCB_REPRINT;
2643			tprintf(" <unfinished ...>\n");
2644		}
2645	}
2646	curcol = 0;
2647	if ((followfork == 1 || pflag_seen > 1) && outfname)
2648		tprintf("%-5d ", tcp->pid);
2649	else if (nprocs > 1 && !outfname)
2650		tprintf("[pid %5u] ", tcp->pid);
2651	if (tflag) {
2652		char str[sizeof("HH:MM:SS")];
2653		struct timeval tv, dtv;
2654		static struct timeval otv;
2655
2656		gettimeofday(&tv, NULL);
2657		if (rflag) {
2658			if (otv.tv_sec == 0)
2659				otv = tv;
2660			tv_sub(&dtv, &tv, &otv);
2661			tprintf("%6ld.%06ld ",
2662				(long) dtv.tv_sec, (long) dtv.tv_usec);
2663			otv = tv;
2664		}
2665		else if (tflag > 2) {
2666			tprintf("%ld.%06ld ",
2667				(long) tv.tv_sec, (long) tv.tv_usec);
2668		}
2669		else {
2670			time_t local = tv.tv_sec;
2671			strftime(str, sizeof(str), "%T", localtime(&local));
2672			if (tflag > 1)
2673				tprintf("%s.%06ld ", str, (long) tv.tv_usec);
2674			else
2675				tprintf("%s ", str);
2676		}
2677	}
2678	if (iflag)
2679		printcall(tcp);
2680}
2681
2682void
2683tabto(int col)
2684{
2685	if (curcol < col)
2686		tprintf("%*s", col - curcol, "");
2687}
2688
2689void
2690printtrailer(void)
2691{
2692	tprintf("\n");
2693	tcp_last = NULL;
2694}
2695
2696#ifdef HAVE_MP_PROCFS
2697
2698int
2699mp_ioctl(int fd, int cmd, void *arg, int size)
2700{
2701	struct iovec iov[2];
2702	int n = 1;
2703
2704	iov[0].iov_base = &cmd;
2705	iov[0].iov_len = sizeof cmd;
2706	if (arg) {
2707		++n;
2708		iov[1].iov_base = arg;
2709		iov[1].iov_len = size;
2710	}
2711
2712	return writev(fd, iov, n);
2713}
2714
2715#endif
2716