strace.c revision 5940e6593911dcace424c668a1c0934c71fccb9e
1/*
2 * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3 * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4 * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5 * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 *	$Id$
31 */
32
33#include "defs.h"
34
35#include <sys/types.h>
36#include <stdarg.h>
37#include <signal.h>
38#include <errno.h>
39#include <sys/param.h>
40#include <fcntl.h>
41#include <sys/resource.h>
42#include <sys/wait.h>
43#include <sys/stat.h>
44#include <pwd.h>
45#include <grp.h>
46#include <string.h>
47#include <dirent.h>
48
49#ifdef LINUX
50# include <asm/unistd.h>
51# if defined __NR_tkill
52#  define my_tkill(tid, sig) syscall(__NR_tkill, (tid), (sig))
53# else
54   /* kill() may choose arbitrarily the target task of the process group
55      while we later wait on a that specific TID.  PID process waits become
56      TID task specific waits for a process under ptrace(2).  */
57#  warning "Neither tkill(2) nor tgkill(2) available, risk of strace hangs!"
58#  define my_tkill(tid, sig) kill((tid), (sig))
59# endif
60#endif
61
62#if defined(IA64) && defined(LINUX)
63# include <asm/ptrace_offsets.h>
64#endif
65
66#ifdef USE_PROCFS
67#include <poll.h>
68#endif
69
70#ifdef SVR4
71#include <sys/stropts.h>
72#ifdef HAVE_MP_PROCFS
73#ifdef HAVE_SYS_UIO_H
74#include <sys/uio.h>
75#endif
76#endif
77#endif
78extern char **environ;
79extern int optind;
80extern char *optarg;
81
82
83int debug = 0, followfork = 0;
84unsigned int ptrace_setoptions = 0;
85/* Which WSTOPSIG(status) value marks syscall traps? */
86static unsigned int syscall_trap_sig = SIGTRAP;
87int dtime = 0, xflag = 0, qflag = 0;
88cflag_t cflag = CFLAG_NONE;
89static int iflag = 0, interactive = 0, pflag_seen = 0, rflag = 0, tflag = 0;
90/*
91 * daemonized_tracer supports -D option.
92 * With this option, strace forks twice.
93 * Unlike normal case, with -D *grandparent* process exec's,
94 * becoming a traced process. Child exits (this prevents traced process
95 * from having children it doesn't expect to have), and grandchild
96 * attaches to grandparent similarly to strace -p PID.
97 * This allows for more transparent interaction in cases
98 * when process and its parent are communicating via signals,
99 * wait() etc. Without -D, strace process gets lodged in between,
100 * disrupting parent<->child link.
101 */
102static bool daemonized_tracer = 0;
103
104/* Sometimes we want to print only succeeding syscalls. */
105int not_failing_only = 0;
106
107/* Show path associated with fd arguments */
108int show_fd_path = 0;
109
110/* are we filtering traces based on paths? */
111int tracing_paths = 0;
112
113static int exit_code = 0;
114static int strace_child = 0;
115static int strace_tracer_pid = 0;
116
117static char *username = NULL;
118static uid_t run_uid;
119static gid_t run_gid;
120
121int max_strlen = DEFAULT_STRLEN;
122static int acolumn = DEFAULT_ACOLUMN;
123static char *acolumn_spaces;
124static char *outfname = NULL;
125static FILE *outf;
126static int curcol;
127static struct tcb **tcbtab;
128static unsigned int nprocs, tcbtabsize;
129static const char *progname;
130
131static int detach(struct tcb *tcp, int sig);
132static int trace(void);
133static void cleanup(void);
134static void interrupt(int sig);
135static sigset_t empty_set, blocked_set;
136
137#ifdef HAVE_SIG_ATOMIC_T
138static volatile sig_atomic_t interrupted;
139#else /* !HAVE_SIG_ATOMIC_T */
140static volatile int interrupted;
141#endif /* !HAVE_SIG_ATOMIC_T */
142
143#ifdef USE_PROCFS
144
145static struct tcb *pfd2tcb(int pfd);
146static void reaper(int sig);
147static void rebuild_pollv(void);
148static struct pollfd *pollv;
149
150#ifndef HAVE_POLLABLE_PROCFS
151
152static void proc_poll_open(void);
153static void proc_poller(int pfd);
154
155struct proc_pollfd {
156	int fd;
157	int revents;
158	int pid;
159};
160
161static int poller_pid;
162static int proc_poll_pipe[2] = { -1, -1 };
163
164#endif /* !HAVE_POLLABLE_PROCFS */
165
166#ifdef HAVE_MP_PROCFS
167#define POLLWANT	POLLWRNORM
168#else
169#define POLLWANT	POLLPRI
170#endif
171#endif /* USE_PROCFS */
172
173static void
174usage(FILE *ofp, int exitval)
175{
176	fprintf(ofp, "\
177usage: strace [-CdDffhiqrtttTvVxxy] [-a column] [-e expr] ... [-o file]\n\
178              [-p pid] ... [-s strsize] [-u username] [-E var=val] ...\n\
179              [-P path] [command [arg ...]]\n\
180   or: strace -c [-D] [-e expr] ... [-O overhead] [-S sortby] [-E var=val] ...\n\
181              [command [arg ...]]\n\
182-c -- count time, calls, and errors for each syscall and report summary\n\
183-C -- like -c but also print regular output while processes are running\n\
184-f -- follow forks, -ff -- with output into separate files\n\
185-F -- attempt to follow vforks, -h -- print help message\n\
186-i -- print instruction pointer at time of syscall\n\
187-q -- suppress messages about attaching, detaching, etc.\n\
188-r -- print relative timestamp, -t -- absolute timestamp, -tt -- with usecs\n\
189-T -- print time spent in each syscall, -V -- print version\n\
190-v -- verbose mode: print unabbreviated argv, stat, termio[s], etc. args\n\
191-x -- print non-ascii strings in hex, -xx -- print all strings in hex\n\
192-y -- print paths associated with file descriptor arguments\n\
193-a column -- alignment COLUMN for printing syscall results (default %d)\n\
194-e expr -- a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
195   options: trace, abbrev, verbose, raw, signal, read, or write\n\
196-o file -- send trace output to FILE instead of stderr\n\
197-O overhead -- set overhead for tracing syscalls to OVERHEAD usecs\n\
198-p pid -- trace process with process id PID, may be repeated\n\
199-D -- run tracer process as a detached grandchild, not as parent\n\
200-s strsize -- limit length of print strings to STRSIZE chars (default %d)\n\
201-S sortby -- sort syscall counts by: time, calls, name, nothing (default %s)\n\
202-u username -- run command as username handling setuid and/or setgid\n\
203-E var=val -- put var=val in the environment for command\n\
204-E var -- remove var from the environment for command\n\
205-P path -- trace accesses to path\n\
206" /* this is broken, so don't document it
207-z -- print only succeeding syscalls\n\
208  */
209, DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
210	exit(exitval);
211}
212
213static void die(void) __attribute__ ((noreturn));
214static void die(void)
215{
216	if (strace_tracer_pid == getpid()) {
217		cflag = 0;
218		cleanup();
219	}
220	exit(1);
221}
222
223static void verror_msg(int err_no, const char *fmt, va_list p)
224{
225	fflush(NULL);
226	fprintf(stderr, "%s: ", progname);
227	vfprintf(stderr, fmt, p);
228	if (err_no)
229		fprintf(stderr, ": %s\n", strerror(err_no));
230	else
231		putc('\n', stderr);
232	fflush(stderr);
233}
234
235void error_msg(const char *fmt, ...)
236{
237	va_list p;
238	va_start(p, fmt);
239	verror_msg(0, fmt, p);
240	va_end(p);
241}
242
243void error_msg_and_die(const char *fmt, ...)
244{
245	va_list p;
246	va_start(p, fmt);
247	verror_msg(0, fmt, p);
248	die();
249}
250
251void perror_msg(const char *fmt, ...)
252{
253	va_list p;
254	va_start(p, fmt);
255	verror_msg(errno, fmt, p);
256	va_end(p);
257}
258
259void perror_msg_and_die(const char *fmt, ...)
260{
261	va_list p;
262	va_start(p, fmt);
263	verror_msg(errno, fmt, p);
264	die();
265}
266
267void die_out_of_memory(void)
268{
269	static bool recursed = 0;
270	if (recursed)
271		exit(1);
272	recursed = 1;
273	error_msg_and_die("Out of memory");
274}
275
276#ifdef SVR4
277#ifdef MIPS
278void
279foobar()
280{
281}
282#endif /* MIPS */
283#endif /* SVR4 */
284
285/* Glue for systems without a MMU that cannot provide fork() */
286#ifdef HAVE_FORK
287# define strace_vforked 0
288#else
289# define strace_vforked 1
290# define fork()         vfork()
291#endif
292
293static void
294set_cloexec_flag(int fd)
295{
296	int flags, newflags;
297
298	flags = fcntl(fd, F_GETFD);
299	if (flags < 0) {
300		/* Can happen only if fd is bad.
301		 * Should never happen: if it does, we have a bug
302		 * in the caller. Therefore we just abort
303		 * instead of propagating the error.
304		 */
305		perror_msg_and_die("fcntl(%d, F_GETFD)", fd);
306	}
307
308	newflags = flags | FD_CLOEXEC;
309	if (flags == newflags)
310		return;
311
312	fcntl(fd, F_SETFD, newflags); /* never fails */
313}
314
315/*
316 * When strace is setuid executable, we have to swap uids
317 * before and after filesystem and process management operations.
318 */
319static void
320swap_uid(void)
321{
322#ifndef SVR4
323	int euid = geteuid(), uid = getuid();
324
325	if (euid != uid && setreuid(euid, uid) < 0) {
326		perror_msg_and_die("setreuid");
327	}
328#endif
329}
330
331#if _LFS64_LARGEFILE
332# define fopen_for_output fopen64
333#else
334# define fopen_for_output fopen
335#endif
336
337static FILE *
338strace_fopen(const char *path)
339{
340	FILE *fp;
341
342	swap_uid();
343	fp = fopen_for_output(path, "w");
344	if (!fp)
345		perror_msg_and_die("Can't fopen '%s'", path);
346	swap_uid();
347	set_cloexec_flag(fileno(fp));
348	return fp;
349}
350
351static int popen_pid = 0;
352
353#ifndef _PATH_BSHELL
354# define _PATH_BSHELL "/bin/sh"
355#endif
356
357/*
358 * We cannot use standard popen(3) here because we have to distinguish
359 * popen child process from other processes we trace, and standard popen(3)
360 * does not export its child's pid.
361 */
362static FILE *
363strace_popen(const char *command)
364{
365	FILE *fp;
366	int fds[2];
367
368	swap_uid();
369	if (pipe(fds) < 0)
370		perror_msg_and_die("pipe");
371
372	set_cloexec_flag(fds[1]); /* never fails */
373
374	popen_pid = vfork();
375	if (popen_pid == -1)
376		perror_msg_and_die("vfork");
377
378	if (popen_pid == 0) {
379		/* child */
380		close(fds[1]);
381		if (fds[0] != 0) {
382			if (dup2(fds[0], 0))
383				perror_msg_and_die("dup2");
384			close(fds[0]);
385		}
386		execl(_PATH_BSHELL, "sh", "-c", command, NULL);
387		perror_msg_and_die("Can't execute '%s'", _PATH_BSHELL);
388	}
389
390	/* parent */
391	close(fds[0]);
392	swap_uid();
393	fp = fdopen(fds[1], "w");
394	if (!fp)
395		die_out_of_memory();
396	return fp;
397}
398
399static void
400newoutf(struct tcb *tcp)
401{
402	if (outfname && followfork > 1) {
403		char name[520 + sizeof(int) * 3];
404		sprintf(name, "%.512s.%u", outfname, tcp->pid);
405		tcp->outf = strace_fopen(name);
406	}
407}
408
409static void
410startup_attach(void)
411{
412	int tcbi;
413	struct tcb *tcp;
414
415	/*
416	 * Block user interruptions as we would leave the traced
417	 * process stopped (process state T) if we would terminate in
418	 * between PTRACE_ATTACH and wait4 () on SIGSTOP.
419	 * We rely on cleanup() from this point on.
420	 */
421	if (interactive)
422		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
423
424	if (daemonized_tracer) {
425		pid_t pid = fork();
426		if (pid < 0) {
427			_exit(1);
428		}
429		if (pid) { /* parent */
430			/*
431			 * Wait for grandchild to attach to straced process
432			 * (grandparent). Grandchild SIGKILLs us after it attached.
433			 * Grandparent's wait() is unblocked by our death,
434			 * it proceeds to exec the straced program.
435			 */
436			pause();
437			_exit(0); /* paranoia */
438		}
439		/* grandchild */
440		/* We will be the tracer process. Remember our new pid: */
441		strace_tracer_pid = getpid();
442	}
443
444	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
445		tcp = tcbtab[tcbi];
446
447		if (!(tcp->flags & TCB_INUSE) || !(tcp->flags & TCB_ATTACHED))
448			continue;
449#ifdef LINUX
450		if (tcp->flags & TCB_ATTACH_DONE)
451			continue;
452#endif
453		/* Reinitialize the output since it may have changed. */
454		tcp->outf = outf;
455		newoutf(tcp);
456
457#ifdef USE_PROCFS
458		if (proc_open(tcp, 1) < 0) {
459			fprintf(stderr, "trouble opening proc file\n");
460			droptcb(tcp);
461			continue;
462		}
463#else /* !USE_PROCFS */
464# ifdef LINUX
465		if (followfork && !daemonized_tracer) {
466			char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
467			DIR *dir;
468
469			sprintf(procdir, "/proc/%d/task", tcp->pid);
470			dir = opendir(procdir);
471			if (dir != NULL) {
472				unsigned int ntid = 0, nerr = 0;
473				struct dirent *de;
474				int tid;
475				while ((de = readdir(dir)) != NULL) {
476					if (de->d_fileno == 0)
477						continue;
478					tid = atoi(de->d_name);
479					if (tid <= 0)
480						continue;
481					++ntid;
482					if (ptrace(PTRACE_ATTACH, tid, (char *) 1, 0) < 0) {
483						++nerr;
484						if (debug)
485							fprintf(stderr, "attach to pid %d failed\n", tid);
486					}
487					else {
488						if (debug)
489							fprintf(stderr, "attach to pid %d succeeded\n", tid);
490						if (tid != tcp->pid) {
491							struct tcb *new_tcp = alloctcb(tid);
492							new_tcp->flags |= TCB_ATTACHED|TCB_ATTACH_DONE;
493						}
494					}
495					if (interactive) {
496						sigprocmask(SIG_SETMASK, &empty_set, NULL);
497						if (interrupted)
498							goto ret;
499						sigprocmask(SIG_BLOCK, &blocked_set, NULL);
500					}
501				}
502				closedir(dir);
503				ntid -= nerr;
504				if (ntid == 0) {
505					perror("attach: ptrace(PTRACE_ATTACH, ...)");
506					droptcb(tcp);
507					continue;
508				}
509				if (!qflag) {
510					fprintf(stderr, ntid > 1
511? "Process %u attached with %u threads - interrupt to quit\n"
512: "Process %u attached - interrupt to quit\n",
513						tcp->pid, ntid);
514				}
515				continue;
516			} /* if (opendir worked) */
517		} /* if (-f) */
518# endif /* LINUX */
519		if (ptrace(PTRACE_ATTACH, tcp->pid, (char *) 1, 0) < 0) {
520			perror("attach: ptrace(PTRACE_ATTACH, ...)");
521			droptcb(tcp);
522			continue;
523		}
524		if (debug)
525			fprintf(stderr, "attach to pid %d (main) succeeded\n", tcp->pid);
526
527		if (daemonized_tracer) {
528			/*
529			 * It is our grandparent we trace, not a -p PID.
530			 * Don't want to just detach on exit, so...
531			 */
532			tcp->flags &= ~TCB_ATTACHED;
533			/*
534			 * Make parent go away.
535			 * Also makes grandparent's wait() unblock.
536			 */
537			kill(getppid(), SIGKILL);
538		}
539
540#endif /* !USE_PROCFS */
541		if (!qflag)
542			fprintf(stderr,
543				"Process %u attached - interrupt to quit\n",
544				tcp->pid);
545	} /* for each tcbtab[] */
546
547 ret:
548#ifdef LINUX
549	/* TCB_ATTACH_DONE flag is used only in this function */
550	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
551		tcp = tcbtab[tcbi];
552		tcp->flags &= ~TCB_ATTACH_DONE;
553	}
554#endif
555
556	if (interactive)
557		sigprocmask(SIG_SETMASK, &empty_set, NULL);
558}
559
560static void
561startup_child(char **argv)
562{
563	struct stat statbuf;
564	const char *filename;
565	char pathname[MAXPATHLEN];
566	int pid = 0;
567	struct tcb *tcp;
568
569	filename = argv[0];
570	if (strchr(filename, '/')) {
571		if (strlen(filename) > sizeof pathname - 1) {
572			errno = ENAMETOOLONG;
573			perror_msg_and_die("exec");
574		}
575		strcpy(pathname, filename);
576	}
577#ifdef USE_DEBUGGING_EXEC
578	/*
579	 * Debuggers customarily check the current directory
580	 * first regardless of the path but doing that gives
581	 * security geeks a panic attack.
582	 */
583	else if (stat(filename, &statbuf) == 0)
584		strcpy(pathname, filename);
585#endif /* USE_DEBUGGING_EXEC */
586	else {
587		const char *path;
588		int m, n, len;
589
590		for (path = getenv("PATH"); path && *path; path += m) {
591			if (strchr(path, ':')) {
592				n = strchr(path, ':') - path;
593				m = n + 1;
594			}
595			else
596				m = n = strlen(path);
597			if (n == 0) {
598				if (!getcwd(pathname, MAXPATHLEN))
599					continue;
600				len = strlen(pathname);
601			}
602			else if (n > sizeof pathname - 1)
603				continue;
604			else {
605				strncpy(pathname, path, n);
606				len = n;
607			}
608			if (len && pathname[len - 1] != '/')
609				pathname[len++] = '/';
610			strcpy(pathname + len, filename);
611			if (stat(pathname, &statbuf) == 0 &&
612			    /* Accept only regular files
613			       with some execute bits set.
614			       XXX not perfect, might still fail */
615			    S_ISREG(statbuf.st_mode) &&
616			    (statbuf.st_mode & 0111))
617				break;
618		}
619	}
620	if (stat(pathname, &statbuf) < 0) {
621		perror_msg_and_die("Can't stat '%s'", filename);
622	}
623	strace_child = pid = fork();
624	if (pid < 0) {
625		perror_msg_and_die("fork");
626	}
627	if ((pid != 0 && daemonized_tracer) /* -D: parent to become a traced process */
628	 || (pid == 0 && !daemonized_tracer) /* not -D: child to become a traced process */
629	) {
630		pid = getpid();
631#ifdef USE_PROCFS
632		if (outf != stderr) close(fileno(outf));
633#ifdef MIPS
634		/* Kludge for SGI, see proc_open for details. */
635		sa.sa_handler = foobar;
636		sa.sa_flags = 0;
637		sigemptyset(&sa.sa_mask);
638		sigaction(SIGINT, &sa, NULL);
639#endif /* MIPS */
640#ifndef FREEBSD
641		pause();
642#else /* FREEBSD */
643		kill(pid, SIGSTOP); /* stop HERE */
644#endif /* FREEBSD */
645#else /* !USE_PROCFS */
646		if (outf != stderr)
647			close(fileno(outf));
648
649		if (!daemonized_tracer) {
650			if (ptrace(PTRACE_TRACEME, 0, (char *) 1, 0) < 0) {
651				perror_msg_and_die("ptrace(PTRACE_TRACEME, ...)");
652			}
653			if (debug)
654				kill(pid, SIGSTOP);
655		}
656
657		if (username != NULL || geteuid() == 0) {
658			uid_t run_euid = run_uid;
659			gid_t run_egid = run_gid;
660
661			if (statbuf.st_mode & S_ISUID)
662				run_euid = statbuf.st_uid;
663			if (statbuf.st_mode & S_ISGID)
664				run_egid = statbuf.st_gid;
665
666			/*
667			 * It is important to set groups before we
668			 * lose privileges on setuid.
669			 */
670			if (username != NULL) {
671				if (initgroups(username, run_gid) < 0) {
672					perror_msg_and_die("initgroups");
673				}
674				if (setregid(run_gid, run_egid) < 0) {
675					perror_msg_and_die("setregid");
676				}
677				if (setreuid(run_uid, run_euid) < 0) {
678					perror_msg_and_die("setreuid");
679				}
680			}
681		}
682		else
683			setreuid(run_uid, run_uid);
684
685		if (!daemonized_tracer) {
686			/*
687			 * Induce an immediate stop so that the parent
688			 * will resume us with PTRACE_SYSCALL and display
689			 * this execve call normally.
690			 * Unless of course we're on a no-MMU system where
691			 * we vfork()-ed, so we cannot stop the child.
692			 */
693			if (!strace_vforked)
694				kill(getpid(), SIGSTOP);
695		} else {
696			struct sigaction sv_sigchld;
697			sigaction(SIGCHLD, NULL, &sv_sigchld);
698			/*
699			 * Make sure it is not SIG_IGN, otherwise wait
700			 * will not block.
701			 */
702			signal(SIGCHLD, SIG_DFL);
703			/*
704			 * Wait for grandchild to attach to us.
705			 * It kills child after that, and wait() unblocks.
706			 */
707			alarm(3);
708			wait(NULL);
709			alarm(0);
710			sigaction(SIGCHLD, &sv_sigchld, NULL);
711		}
712#endif /* !USE_PROCFS */
713
714		execv(pathname, argv);
715		perror_msg_and_die("exec");
716	}
717
718	/* We are the tracer.  */
719	/* With -D, we are *child* here, IOW: different pid. Fetch it. */
720	strace_tracer_pid = getpid();
721
722	tcp = alloctcb(daemonized_tracer ? getppid() : pid);
723	if (daemonized_tracer) {
724		/* We want subsequent startup_attach() to attach to it.  */
725		tcp->flags |= TCB_ATTACHED;
726	}
727#ifdef USE_PROCFS
728	if (proc_open(tcp, 0) < 0) {
729		perror_msg_and_die("trouble opening proc file");
730	}
731#endif /* USE_PROCFS */
732}
733
734#ifdef LINUX
735static void kill_save_errno(pid_t pid, int sig)
736{
737	int saved_errno = errno;
738
739	(void) kill(pid, sig);
740	errno = saved_errno;
741}
742
743/*
744 * Test whether the kernel support PTRACE_O_TRACECLONE et al options.
745 * First fork a new child, call ptrace with PTRACE_SETOPTIONS on it,
746 * and then see which options are supported by the kernel.
747 */
748static void
749test_ptrace_setoptions_followfork(void)
750{
751	int pid, expected_grandchild = 0, found_grandchild = 0;
752	const unsigned int test_options = PTRACE_O_TRACECLONE |
753					  PTRACE_O_TRACEFORK |
754					  PTRACE_O_TRACEVFORK;
755
756	pid = fork();
757	if (pid < 0)
758		perror_msg_and_die("fork");
759	if (pid == 0) {
760		pid = getpid();
761		if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0)
762			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
763					   __func__);
764		kill(pid, SIGSTOP);
765		if (fork() < 0)
766			perror_msg_and_die("fork");
767		_exit(0);
768	}
769
770	while (1) {
771		int status, tracee_pid;
772
773		errno = 0;
774		tracee_pid = wait(&status);
775		if (tracee_pid <= 0) {
776			if (errno == EINTR)
777				continue;
778			else if (errno == ECHILD)
779				break;
780			kill_save_errno(pid, SIGKILL);
781			perror_msg_and_die("%s: unexpected wait result %d",
782					   __func__, tracee_pid);
783		}
784		if (WIFEXITED(status)) {
785			if (WEXITSTATUS(status)) {
786				if (tracee_pid != pid)
787					kill_save_errno(pid, SIGKILL);
788				error_msg_and_die("%s: unexpected exit status %u",
789						  __func__, WEXITSTATUS(status));
790			}
791			continue;
792		}
793		if (WIFSIGNALED(status)) {
794			if (tracee_pid != pid)
795				kill_save_errno(pid, SIGKILL);
796			error_msg_and_die("%s: unexpected signal %u",
797					  __func__, WTERMSIG(status));
798		}
799		if (!WIFSTOPPED(status)) {
800			if (tracee_pid != pid)
801				kill_save_errno(tracee_pid, SIGKILL);
802			kill(pid, SIGKILL);
803			error_msg_and_die("%s: unexpected wait status %x",
804					  __func__, status);
805		}
806		if (tracee_pid != pid) {
807			found_grandchild = tracee_pid;
808			if (ptrace(PTRACE_CONT, tracee_pid, 0, 0) < 0) {
809				kill_save_errno(tracee_pid, SIGKILL);
810				kill_save_errno(pid, SIGKILL);
811				perror_msg_and_die("PTRACE_CONT doesn't work");
812			}
813			continue;
814		}
815		switch (WSTOPSIG(status)) {
816		case SIGSTOP:
817			if (ptrace(PTRACE_SETOPTIONS, pid, 0, test_options) < 0
818			    && errno != EINVAL && errno != EIO)
819				perror_msg("PTRACE_SETOPTIONS");
820			break;
821		case SIGTRAP:
822			if (status >> 16 == PTRACE_EVENT_FORK) {
823				long msg = 0;
824
825				if (ptrace(PTRACE_GETEVENTMSG, pid,
826					   NULL, (long) &msg) == 0)
827					expected_grandchild = msg;
828			}
829			break;
830		}
831		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) {
832			kill_save_errno(pid, SIGKILL);
833			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
834		}
835	}
836	if (expected_grandchild && expected_grandchild == found_grandchild) {
837		ptrace_setoptions |= test_options;
838		if (debug)
839			fprintf(stderr, "ptrace_setoptions = %#x\n",
840				ptrace_setoptions);
841		return;
842	}
843	error_msg("Test for PTRACE_O_TRACECLONE failed, "
844		  "giving up using this feature.");
845}
846
847/*
848 * Test whether the kernel support PTRACE_O_TRACESYSGOOD.
849 * First fork a new child, call ptrace(PTRACE_SETOPTIONS) on it,
850 * and then see whether it will stop with (SIGTRAP | 0x80).
851 *
852 * Use of this option enables correct handling of user-generated SIGTRAPs,
853 * and SIGTRAPs generated by special instructions such as int3 on x86:
854 * _start:	.globl	_start
855 *		int3
856 *		movl	$42, %ebx
857 *		movl	$1, %eax
858 *		int	$0x80
859 * (compile with: "gcc -nostartfiles -nostdlib -o int3 int3.S")
860 */
861static void
862test_ptrace_setoptions_for_all(void)
863{
864	const unsigned int test_options = PTRACE_O_TRACESYSGOOD |
865					  PTRACE_O_TRACEEXEC;
866	int pid;
867	int it_worked = 0;
868
869	pid = fork();
870	if (pid < 0)
871		perror_msg_and_die("fork");
872
873	if (pid == 0) {
874		pid = getpid();
875		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
876			/* Note: exits with exitcode 1 */
877			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
878					   __func__);
879		kill(pid, SIGSTOP);
880		_exit(0); /* parent should see entry into this syscall */
881	}
882
883	while (1) {
884		int status, tracee_pid;
885
886		errno = 0;
887		tracee_pid = wait(&status);
888		if (tracee_pid <= 0) {
889			if (errno == EINTR)
890				continue;
891			kill_save_errno(pid, SIGKILL);
892			perror_msg_and_die("%s: unexpected wait result %d",
893					   __func__, tracee_pid);
894		}
895		if (WIFEXITED(status)) {
896			if (WEXITSTATUS(status) == 0)
897				break;
898			error_msg_and_die("%s: unexpected exit status %u",
899					  __func__, WEXITSTATUS(status));
900		}
901		if (WIFSIGNALED(status)) {
902			error_msg_and_die("%s: unexpected signal %u",
903					  __func__, WTERMSIG(status));
904		}
905		if (!WIFSTOPPED(status)) {
906			kill(pid, SIGKILL);
907			error_msg_and_die("%s: unexpected wait status %x",
908					  __func__, status);
909		}
910		if (WSTOPSIG(status) == SIGSTOP) {
911			/*
912			 * We don't check "options aren't accepted" error.
913			 * If it happens, we'll never get (SIGTRAP | 0x80),
914			 * and thus will decide to not use the option.
915			 * IOW: the outcome of the test will be correct.
916			 */
917			if (ptrace(PTRACE_SETOPTIONS, pid, 0L, test_options) < 0
918			    && errno != EINVAL && errno != EIO)
919				perror_msg("PTRACE_SETOPTIONS");
920		}
921		if (WSTOPSIG(status) == (SIGTRAP | 0x80)) {
922			it_worked = 1;
923		}
924		if (ptrace(PTRACE_SYSCALL, pid, 0L, 0L) < 0) {
925			kill_save_errno(pid, SIGKILL);
926			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
927		}
928	}
929
930	if (it_worked) {
931		syscall_trap_sig = (SIGTRAP | 0x80);
932		ptrace_setoptions |= test_options;
933		if (debug)
934			fprintf(stderr, "ptrace_setoptions = %#x\n",
935				ptrace_setoptions);
936		return;
937	}
938
939	error_msg("Test for PTRACE_O_TRACESYSGOOD failed, "
940		  "giving up using this feature.");
941}
942#endif
943
944int
945main(int argc, char *argv[])
946{
947	struct tcb *tcp;
948	int c, pid = 0;
949	int optF = 0;
950	struct sigaction sa;
951
952	progname = argv[0] ? argv[0] : "strace";
953
954	strace_tracer_pid = getpid();
955
956	/* Allocate the initial tcbtab.  */
957	tcbtabsize = argc;	/* Surely enough for all -p args.  */
958	tcbtab = calloc(tcbtabsize, sizeof(tcbtab[0]));
959	if (!tcbtab)
960		die_out_of_memory();
961	tcp = calloc(tcbtabsize, sizeof(*tcp));
962	if (!tcp)
963		die_out_of_memory();
964	for (c = 0; c < tcbtabsize; c++)
965		tcbtab[c] = tcp++;
966
967	outf = stderr;
968	interactive = 1;
969	set_sortby(DEFAULT_SORTBY);
970	set_personality(DEFAULT_PERSONALITY);
971	qualify("trace=all");
972	qualify("abbrev=all");
973	qualify("verbose=all");
974	qualify("signal=all");
975	while ((c = getopt(argc, argv,
976		"+cCdfFhiqrtTvVxyz"
977#ifndef USE_PROCFS
978		"D"
979#endif
980		"a:e:o:O:p:s:S:u:E:P:")) != EOF) {
981		switch (c) {
982		case 'c':
983			if (cflag == CFLAG_BOTH) {
984				error_msg_and_die("-c and -C are mutually exclusive options");
985			}
986			cflag = CFLAG_ONLY_STATS;
987			break;
988		case 'C':
989			if (cflag == CFLAG_ONLY_STATS) {
990				error_msg_and_die("-c and -C are mutually exclusive options");
991			}
992			cflag = CFLAG_BOTH;
993			break;
994		case 'd':
995			debug++;
996			break;
997#ifndef USE_PROCFS
998		case 'D':
999			daemonized_tracer = 1;
1000			break;
1001#endif
1002		case 'F':
1003			optF = 1;
1004			break;
1005		case 'f':
1006			followfork++;
1007			break;
1008		case 'h':
1009			usage(stdout, 0);
1010			break;
1011		case 'i':
1012			iflag++;
1013			break;
1014		case 'q':
1015			qflag++;
1016			break;
1017		case 'r':
1018			rflag++;
1019			tflag++;
1020			break;
1021		case 't':
1022			tflag++;
1023			break;
1024		case 'T':
1025			dtime++;
1026			break;
1027		case 'x':
1028			xflag++;
1029			break;
1030		case 'y':
1031			show_fd_path = 1;
1032			break;
1033		case 'v':
1034			qualify("abbrev=none");
1035			break;
1036		case 'V':
1037			printf("%s -- version %s\n", PACKAGE_NAME, VERSION);
1038			exit(0);
1039			break;
1040		case 'z':
1041			not_failing_only = 1;
1042			break;
1043		case 'a':
1044			acolumn = atoi(optarg);
1045			if (acolumn < 0)
1046				error_msg_and_die("Bad column width '%s'", optarg);
1047			break;
1048		case 'e':
1049			qualify(optarg);
1050			break;
1051		case 'o':
1052			outfname = strdup(optarg);
1053			break;
1054		case 'O':
1055			set_overhead(atoi(optarg));
1056			break;
1057		case 'p':
1058			pid = atoi(optarg);
1059			if (pid <= 0) {
1060				error_msg("Invalid process id: '%s'", optarg);
1061				break;
1062			}
1063			if (pid == strace_tracer_pid) {
1064				error_msg("I'm sorry, I can't let you do that, Dave.");
1065				break;
1066			}
1067			tcp = alloc_tcb(pid, 0);
1068			tcp->flags |= TCB_ATTACHED;
1069			pflag_seen++;
1070			break;
1071		case 'P':
1072			tracing_paths = 1;
1073			if (pathtrace_select(optarg)) {
1074				error_msg_and_die("Failed to select path '%s'", optarg);
1075			}
1076			break;
1077		case 's':
1078			max_strlen = atoi(optarg);
1079			if (max_strlen < 0) {
1080				error_msg_and_die("Invalid -s argument: '%s'", optarg);
1081			}
1082			break;
1083		case 'S':
1084			set_sortby(optarg);
1085			break;
1086		case 'u':
1087			username = strdup(optarg);
1088			break;
1089		case 'E':
1090			if (putenv(optarg) < 0)
1091				die_out_of_memory();
1092			break;
1093		default:
1094			usage(stderr, 1);
1095			break;
1096		}
1097	}
1098
1099	acolumn_spaces = malloc(acolumn + 1);
1100	if (!acolumn_spaces)
1101		die_out_of_memory();
1102	memset(acolumn_spaces, ' ', acolumn);
1103	acolumn_spaces[acolumn] = '\0';
1104
1105	if ((optind == argc) == !pflag_seen)
1106		usage(stderr, 1);
1107
1108	if (pflag_seen && daemonized_tracer) {
1109		error_msg_and_die("-D and -p are mutually exclusive options");
1110	}
1111
1112	if (!followfork)
1113		followfork = optF;
1114
1115	if (followfork > 1 && cflag) {
1116		error_msg_and_die("(-c or -C) and -ff are mutually exclusive options");
1117	}
1118
1119	/* See if they want to run as another user. */
1120	if (username != NULL) {
1121		struct passwd *pent;
1122
1123		if (getuid() != 0 || geteuid() != 0) {
1124			error_msg_and_die("You must be root to use the -u option");
1125		}
1126		pent = getpwnam(username);
1127		if (pent == NULL) {
1128			error_msg_and_die("Cannot find user '%s'", username);
1129		}
1130		run_uid = pent->pw_uid;
1131		run_gid = pent->pw_gid;
1132	}
1133	else {
1134		run_uid = getuid();
1135		run_gid = getgid();
1136	}
1137
1138#ifdef LINUX
1139	if (followfork)
1140		test_ptrace_setoptions_followfork();
1141	test_ptrace_setoptions_for_all();
1142#endif
1143
1144	/* Check if they want to redirect the output. */
1145	if (outfname) {
1146		/* See if they want to pipe the output. */
1147		if (outfname[0] == '|' || outfname[0] == '!') {
1148			/*
1149			 * We can't do the <outfname>.PID funny business
1150			 * when using popen, so prohibit it.
1151			 */
1152			if (followfork > 1)
1153				error_msg_and_die("Piping the output and -ff are mutually exclusive");
1154			outf = strace_popen(outfname + 1);
1155		}
1156		else if (followfork <= 1)
1157			outf = strace_fopen(outfname);
1158	}
1159
1160	if (!outfname || outfname[0] == '|' || outfname[0] == '!') {
1161		static char buf[BUFSIZ];
1162		setvbuf(outf, buf, _IOLBF, BUFSIZ);
1163	}
1164	if (outfname && optind < argc) {
1165		interactive = 0;
1166		qflag = 1;
1167	}
1168
1169	/* Valid states here:
1170	   optind < argc	pflag_seen	outfname	interactive
1171	   1			0		0		1
1172	   0			1		0		1
1173	   1			0		1		0
1174	   0			1		1		1
1175	 */
1176
1177	/* STARTUP_CHILD must be called before the signal handlers get
1178	   installed below as they are inherited into the spawned process.
1179	   Also we do not need to be protected by them as during interruption
1180	   in the STARTUP_CHILD mode we kill the spawned process anyway.  */
1181	if (!pflag_seen)
1182		startup_child(&argv[optind]);
1183
1184	sigemptyset(&empty_set);
1185	sigemptyset(&blocked_set);
1186	sa.sa_handler = SIG_IGN;
1187	sigemptyset(&sa.sa_mask);
1188	sa.sa_flags = 0;
1189	sigaction(SIGTTOU, &sa, NULL);
1190	sigaction(SIGTTIN, &sa, NULL);
1191	if (interactive) {
1192		sigaddset(&blocked_set, SIGHUP);
1193		sigaddset(&blocked_set, SIGINT);
1194		sigaddset(&blocked_set, SIGQUIT);
1195		sigaddset(&blocked_set, SIGPIPE);
1196		sigaddset(&blocked_set, SIGTERM);
1197		sa.sa_handler = interrupt;
1198#ifdef SUNOS4
1199		/* POSIX signals on sunos4.1 are a little broken. */
1200		sa.sa_flags = SA_INTERRUPT;
1201#endif /* SUNOS4 */
1202	}
1203	sigaction(SIGHUP, &sa, NULL);
1204	sigaction(SIGINT, &sa, NULL);
1205	sigaction(SIGQUIT, &sa, NULL);
1206	sigaction(SIGPIPE, &sa, NULL);
1207	sigaction(SIGTERM, &sa, NULL);
1208#ifdef USE_PROCFS
1209	sa.sa_handler = reaper;
1210	sigaction(SIGCHLD, &sa, NULL);
1211#else
1212	/* Make sure SIGCHLD has the default action so that waitpid
1213	   definitely works without losing track of children.  The user
1214	   should not have given us a bogus state to inherit, but he might
1215	   have.  Arguably we should detect SIG_IGN here and pass it on
1216	   to children, but probably noone really needs that.  */
1217	sa.sa_handler = SIG_DFL;
1218	sigaction(SIGCHLD, &sa, NULL);
1219#endif /* USE_PROCFS */
1220
1221	if (pflag_seen || daemonized_tracer)
1222		startup_attach();
1223
1224	if (trace() < 0)
1225		exit(1);
1226	cleanup();
1227	fflush(NULL);
1228	if (exit_code > 0xff) {
1229		/* Child was killed by a signal, mimic that.  */
1230		exit_code &= 0xff;
1231		signal(exit_code, SIG_DFL);
1232		raise(exit_code);
1233		/* Paranoia - what if this signal is not fatal?
1234		   Exit with 128 + signo then.  */
1235		exit_code += 128;
1236	}
1237	exit(exit_code);
1238}
1239
1240static void
1241expand_tcbtab(void)
1242{
1243	/* Allocate some more TCBs and expand the table.
1244	   We don't want to relocate the TCBs because our
1245	   callers have pointers and it would be a pain.
1246	   So tcbtab is a table of pointers.  Since we never
1247	   free the TCBs, we allocate a single chunk of many.  */
1248	int i = tcbtabsize;
1249	struct tcb *newtcbs = calloc(tcbtabsize, sizeof(newtcbs[0]));
1250	struct tcb **newtab = realloc(tcbtab, tcbtabsize * 2 * sizeof(tcbtab[0]));
1251	if (!newtab || !newtcbs)
1252		die_out_of_memory();
1253	tcbtabsize *= 2;
1254	tcbtab = newtab;
1255	while (i < tcbtabsize)
1256		tcbtab[i++] = newtcbs++;
1257}
1258
1259struct tcb *
1260alloc_tcb(int pid, int command_options_parsed)
1261{
1262	int i;
1263	struct tcb *tcp;
1264
1265	if (nprocs == tcbtabsize)
1266		expand_tcbtab();
1267
1268	for (i = 0; i < tcbtabsize; i++) {
1269		tcp = tcbtab[i];
1270		if ((tcp->flags & TCB_INUSE) == 0) {
1271			memset(tcp, 0, sizeof(*tcp));
1272			tcp->pid = pid;
1273			tcp->flags = TCB_INUSE | TCB_STARTUP;
1274			tcp->outf = outf; /* Initialise to current out file */
1275#ifdef USE_PROCFS
1276			tcp->pfd = -1;
1277#endif
1278			nprocs++;
1279			if (debug)
1280				fprintf(stderr, "new tcb for pid %d, active tcbs:%d\n", tcp->pid, nprocs);
1281			if (command_options_parsed)
1282				newoutf(tcp);
1283			return tcp;
1284		}
1285	}
1286	error_msg_and_die("bug in alloc_tcb");
1287}
1288
1289#ifdef USE_PROCFS
1290int
1291proc_open(struct tcb *tcp, int attaching)
1292{
1293	char proc[32];
1294	long arg;
1295#ifdef SVR4
1296	int i;
1297	sysset_t syscalls;
1298	sigset_t signals;
1299	fltset_t faults;
1300#endif
1301#ifndef HAVE_POLLABLE_PROCFS
1302	static int last_pfd;
1303#endif
1304
1305#ifdef HAVE_MP_PROCFS
1306	/* Open the process pseudo-files in /proc. */
1307	sprintf(proc, "/proc/%d/ctl", tcp->pid);
1308	tcp->pfd = open(proc, O_WRONLY|O_EXCL);
1309	if (tcp->pfd < 0) {
1310		perror("strace: open(\"/proc/...\", ...)");
1311		return -1;
1312	}
1313	set_cloexec_flag(tcp->pfd);
1314	sprintf(proc, "/proc/%d/status", tcp->pid);
1315	tcp->pfd_stat = open(proc, O_RDONLY|O_EXCL);
1316	if (tcp->pfd_stat < 0) {
1317		perror("strace: open(\"/proc/...\", ...)");
1318		return -1;
1319	}
1320	set_cloexec_flag(tcp->pfd_stat);
1321	sprintf(proc, "/proc/%d/as", tcp->pid);
1322	tcp->pfd_as = open(proc, O_RDONLY|O_EXCL);
1323	if (tcp->pfd_as < 0) {
1324		perror("strace: open(\"/proc/...\", ...)");
1325		return -1;
1326	}
1327	set_cloexec_flag(tcp->pfd_as);
1328#else
1329	/* Open the process pseudo-file in /proc. */
1330# ifndef FREEBSD
1331	sprintf(proc, "/proc/%d", tcp->pid);
1332	tcp->pfd = open(proc, O_RDWR|O_EXCL);
1333# else
1334	sprintf(proc, "/proc/%d/mem", tcp->pid);
1335	tcp->pfd = open(proc, O_RDWR);
1336# endif
1337	if (tcp->pfd < 0) {
1338		perror("strace: open(\"/proc/...\", ...)");
1339		return -1;
1340	}
1341	set_cloexec_flag(tcp->pfd);
1342#endif
1343#ifdef FREEBSD
1344	sprintf(proc, "/proc/%d/regs", tcp->pid);
1345	tcp->pfd_reg = open(proc, O_RDONLY);
1346	if (tcp->pfd_reg < 0) {
1347		perror("strace: open(\"/proc/.../regs\", ...)");
1348		return -1;
1349	}
1350	if (cflag) {
1351		sprintf(proc, "/proc/%d/status", tcp->pid);
1352		tcp->pfd_status = open(proc, O_RDONLY);
1353		if (tcp->pfd_status < 0) {
1354			perror("strace: open(\"/proc/.../status\", ...)");
1355			return -1;
1356		}
1357	} else
1358		tcp->pfd_status = -1;
1359#endif /* FREEBSD */
1360	rebuild_pollv();
1361	if (!attaching) {
1362		/*
1363		 * Wait for the child to pause.  Because of a race
1364		 * condition we have to poll for the event.
1365		 */
1366		for (;;) {
1367			if (IOCTL_STATUS(tcp) < 0) {
1368				perror("strace: PIOCSTATUS");
1369				return -1;
1370			}
1371			if (tcp->status.PR_FLAGS & PR_ASLEEP)
1372				break;
1373		}
1374	}
1375#ifndef FREEBSD
1376	/* Stop the process so that we own the stop. */
1377	if (IOCTL(tcp->pfd, PIOCSTOP, (char *)NULL) < 0) {
1378		perror("strace: PIOCSTOP");
1379		return -1;
1380	}
1381#endif
1382#ifdef PIOCSET
1383	/* Set Run-on-Last-Close. */
1384	arg = PR_RLC;
1385	if (IOCTL(tcp->pfd, PIOCSET, &arg) < 0) {
1386		perror("PIOCSET PR_RLC");
1387		return -1;
1388	}
1389	/* Set or Reset Inherit-on-Fork. */
1390	arg = PR_FORK;
1391	if (IOCTL(tcp->pfd, followfork ? PIOCSET : PIOCRESET, &arg) < 0) {
1392		perror("PIOC{SET,RESET} PR_FORK");
1393		return -1;
1394	}
1395#else  /* !PIOCSET */
1396#ifndef FREEBSD
1397	if (ioctl(tcp->pfd, PIOCSRLC) < 0) {
1398		perror("PIOCSRLC");
1399		return -1;
1400	}
1401	if (ioctl(tcp->pfd, followfork ? PIOCSFORK : PIOCRFORK) < 0) {
1402		perror("PIOC{S,R}FORK");
1403		return -1;
1404	}
1405#else /* FREEBSD */
1406	/* just unset the PF_LINGER flag for the Run-on-Last-Close. */
1407	if (ioctl(tcp->pfd, PIOCGFL, &arg) < 0) {
1408	        perror("PIOCGFL");
1409		return -1;
1410	}
1411	arg &= ~PF_LINGER;
1412	if (ioctl(tcp->pfd, PIOCSFL, arg) < 0) {
1413		perror("PIOCSFL");
1414		return -1;
1415	}
1416#endif /* FREEBSD */
1417#endif /* !PIOCSET */
1418#ifndef FREEBSD
1419	/* Enable all syscall entries we care about. */
1420	premptyset(&syscalls);
1421	for (i = 1; i < MAX_QUALS; ++i) {
1422		if (i > (sizeof syscalls) * CHAR_BIT) break;
1423		if (qual_flags[i] & QUAL_TRACE) praddset(&syscalls, i);
1424	}
1425	praddset(&syscalls, SYS_execve);
1426	if (followfork) {
1427		praddset(&syscalls, SYS_fork);
1428#ifdef SYS_forkall
1429		praddset(&syscalls, SYS_forkall);
1430#endif
1431#ifdef SYS_fork1
1432		praddset(&syscalls, SYS_fork1);
1433#endif
1434#ifdef SYS_rfork1
1435		praddset(&syscalls, SYS_rfork1);
1436#endif
1437#ifdef SYS_rforkall
1438		praddset(&syscalls, SYS_rforkall);
1439#endif
1440	}
1441	if (IOCTL(tcp->pfd, PIOCSENTRY, &syscalls) < 0) {
1442		perror("PIOCSENTRY");
1443		return -1;
1444	}
1445	/* Enable the syscall exits. */
1446	if (IOCTL(tcp->pfd, PIOCSEXIT, &syscalls) < 0) {
1447		perror("PIOSEXIT");
1448		return -1;
1449	}
1450	/* Enable signals we care about. */
1451	premptyset(&signals);
1452	for (i = 1; i < MAX_QUALS; ++i) {
1453		if (i > (sizeof signals) * CHAR_BIT) break;
1454		if (qual_flags[i] & QUAL_SIGNAL) praddset(&signals, i);
1455	}
1456	if (IOCTL(tcp->pfd, PIOCSTRACE, &signals) < 0) {
1457		perror("PIOCSTRACE");
1458		return -1;
1459	}
1460	/* Enable faults we care about */
1461	premptyset(&faults);
1462	for (i = 1; i < MAX_QUALS; ++i) {
1463		if (i > (sizeof faults) * CHAR_BIT) break;
1464		if (qual_flags[i] & QUAL_FAULT) praddset(&faults, i);
1465	}
1466	if (IOCTL(tcp->pfd, PIOCSFAULT, &faults) < 0) {
1467		perror("PIOCSFAULT");
1468		return -1;
1469	}
1470#else /* FREEBSD */
1471	/* set events flags. */
1472	arg = S_SIG | S_SCE | S_SCX;
1473	if (ioctl(tcp->pfd, PIOCBIS, arg) < 0) {
1474		perror("PIOCBIS");
1475		return -1;
1476	}
1477#endif /* FREEBSD */
1478	if (!attaching) {
1479#ifdef MIPS
1480		/*
1481		 * The SGI PRSABORT doesn't work for pause() so
1482		 * we send it a caught signal to wake it up.
1483		 */
1484		kill(tcp->pid, SIGINT);
1485#else /* !MIPS */
1486#ifdef PRSABORT
1487		/* The child is in a pause(), abort it. */
1488		arg = PRSABORT;
1489		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0) {
1490			perror("PIOCRUN");
1491			return -1;
1492		}
1493#endif
1494#endif /* !MIPS*/
1495#ifdef FREEBSD
1496		/* wake up the child if it received the SIGSTOP */
1497		kill(tcp->pid, SIGCONT);
1498#endif
1499		for (;;) {
1500			/* Wait for the child to do something. */
1501			if (IOCTL_WSTOP(tcp) < 0) {
1502				perror("PIOCWSTOP");
1503				return -1;
1504			}
1505			if (tcp->status.PR_WHY == PR_SYSENTRY) {
1506				tcp->flags &= ~TCB_INSYSCALL;
1507				get_scno(tcp);
1508				if (known_scno(tcp) == SYS_execve)
1509					break;
1510			}
1511			/* Set it running: maybe execve will be next. */
1512#ifndef FREEBSD
1513			arg = 0;
1514			if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0)
1515#else
1516			if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0)
1517#endif
1518			{
1519				perror("PIOCRUN");
1520				return -1;
1521			}
1522#ifdef FREEBSD
1523			/* handle the case where we "opened" the child before
1524			   it did the kill -STOP */
1525			if (tcp->status.PR_WHY == PR_SIGNALLED &&
1526			    tcp->status.PR_WHAT == SIGSTOP)
1527			        kill(tcp->pid, SIGCONT);
1528#endif
1529		}
1530	}
1531#ifdef FREEBSD
1532	else {
1533		if (attaching < 2) {
1534			/* We are attaching to an already running process.
1535			 * Try to figure out the state of the process in syscalls,
1536			 * to handle the first event well.
1537			 * This is done by having a look at the "wchan" property of the
1538			 * process, which tells where it is stopped (if it is). */
1539			FILE * status;
1540			char wchan[20]; /* should be enough */
1541
1542			sprintf(proc, "/proc/%d/status", tcp->pid);
1543			status = fopen(proc, "r");
1544			if (status &&
1545			    (fscanf(status, "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d"
1546				    "%*d,%*d %*d,%*d %19s", wchan) == 1) &&
1547			    strcmp(wchan, "nochan") && strcmp(wchan, "spread") &&
1548			    strcmp(wchan, "stopevent")) {
1549				/* The process is asleep in the middle of a syscall.
1550				   Fake the syscall entry event */
1551				tcp->flags &= ~(TCB_INSYSCALL|TCB_STARTUP);
1552				tcp->status.PR_WHY = PR_SYSENTRY;
1553				trace_syscall(tcp);
1554			}
1555			if (status)
1556				fclose(status);
1557		} /* otherwise it's a fork being followed */
1558	}
1559#endif /* FREEBSD */
1560#ifndef HAVE_POLLABLE_PROCFS
1561	if (proc_poll_pipe[0] != -1)
1562		proc_poller(tcp->pfd);
1563	else if (nprocs > 1) {
1564		proc_poll_open();
1565		proc_poller(last_pfd);
1566		proc_poller(tcp->pfd);
1567	}
1568	last_pfd = tcp->pfd;
1569#endif /* !HAVE_POLLABLE_PROCFS */
1570	return 0;
1571}
1572
1573#endif /* USE_PROCFS */
1574
1575struct tcb *
1576pid2tcb(int pid)
1577{
1578	int i;
1579
1580	if (pid <= 0)
1581		return NULL;
1582
1583	for (i = 0; i < tcbtabsize; i++) {
1584		struct tcb *tcp = tcbtab[i];
1585		if (tcp->pid == pid && (tcp->flags & TCB_INUSE))
1586			return tcp;
1587	}
1588
1589	return NULL;
1590}
1591
1592#ifdef USE_PROCFS
1593
1594static struct tcb *
1595first_used_tcb(void)
1596{
1597	int i;
1598	struct tcb *tcp;
1599	for (i = 0; i < tcbtabsize; i++) {
1600		tcp = tcbtab[i];
1601		if (tcp->flags & TCB_INUSE)
1602			return tcp;
1603	}
1604	return NULL;
1605}
1606
1607static struct tcb *
1608pfd2tcb(int pfd)
1609{
1610	int i;
1611
1612	for (i = 0; i < tcbtabsize; i++) {
1613		struct tcb *tcp = tcbtab[i];
1614		if (tcp->pfd != pfd)
1615			continue;
1616		if (tcp->flags & TCB_INUSE)
1617			return tcp;
1618	}
1619	return NULL;
1620}
1621
1622#endif /* USE_PROCFS */
1623
1624void
1625droptcb(struct tcb *tcp)
1626{
1627	if (tcp->pid == 0)
1628		return;
1629
1630	nprocs--;
1631	if (debug)
1632		fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
1633
1634#ifdef USE_PROCFS
1635	if (tcp->pfd != -1) {
1636		close(tcp->pfd);
1637		tcp->pfd = -1;
1638# ifdef FREEBSD
1639		if (tcp->pfd_reg != -1) {
1640		        close(tcp->pfd_reg);
1641		        tcp->pfd_reg = -1;
1642		}
1643		if (tcp->pfd_status != -1) {
1644			close(tcp->pfd_status);
1645			tcp->pfd_status = -1;
1646		}
1647# endif
1648		tcp->flags = 0; /* rebuild_pollv needs it */
1649		rebuild_pollv();
1650	}
1651#endif
1652
1653	if (outfname && followfork > 1 && tcp->outf)
1654		fclose(tcp->outf);
1655
1656	memset(tcp, 0, sizeof(*tcp));
1657}
1658
1659/* detach traced process; continue with sig
1660   Never call DETACH twice on the same process as both unattached and
1661   attached-unstopped processes give the same ESRCH.  For unattached process we
1662   would SIGSTOP it and wait for its SIGSTOP notification forever.  */
1663
1664static int
1665detach(struct tcb *tcp, int sig)
1666{
1667	int error = 0;
1668#ifdef LINUX
1669	int status, catch_sigstop;
1670#endif
1671
1672	if (tcp->flags & TCB_BPTSET)
1673		clearbpt(tcp);
1674
1675#ifdef LINUX
1676	/*
1677	 * Linux wrongly insists the child be stopped
1678	 * before detaching.  Arghh.  We go through hoops
1679	 * to make a clean break of things.
1680	 */
1681#if defined(SPARC)
1682#undef PTRACE_DETACH
1683#define PTRACE_DETACH PTRACE_SUNDETACH
1684#endif
1685	/*
1686	 * On TCB_STARTUP we did PTRACE_ATTACH but still did not get the
1687	 * expected SIGSTOP.  We must catch exactly one as otherwise the
1688	 * detached process would be left stopped (process state T).
1689	 */
1690	catch_sigstop = (tcp->flags & TCB_STARTUP);
1691	error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, sig);
1692	if (error == 0) {
1693		/* On a clear day, you can see forever. */
1694	}
1695	else if (errno != ESRCH) {
1696		/* Shouldn't happen. */
1697		perror("detach: ptrace(PTRACE_DETACH, ...)");
1698	}
1699	else if (my_tkill(tcp->pid, 0) < 0) {
1700		if (errno != ESRCH)
1701			perror("detach: checking sanity");
1702	}
1703	else if (!catch_sigstop && my_tkill(tcp->pid, SIGSTOP) < 0) {
1704		if (errno != ESRCH)
1705			perror("detach: stopping child");
1706	}
1707	else
1708		catch_sigstop = 1;
1709	if (catch_sigstop) {
1710		for (;;) {
1711#ifdef __WALL
1712			if (wait4(tcp->pid, &status, __WALL, NULL) < 0) {
1713				if (errno == ECHILD) /* Already gone.  */
1714					break;
1715				if (errno != EINVAL) {
1716					perror("detach: waiting");
1717					break;
1718				}
1719#endif /* __WALL */
1720				/* No __WALL here.  */
1721				if (waitpid(tcp->pid, &status, 0) < 0) {
1722					if (errno != ECHILD) {
1723						perror("detach: waiting");
1724						break;
1725					}
1726#ifdef __WCLONE
1727					/* If no processes, try clones.  */
1728					if (wait4(tcp->pid, &status, __WCLONE,
1729						  NULL) < 0) {
1730						if (errno != ECHILD)
1731							perror("detach: waiting");
1732						break;
1733					}
1734#endif /* __WCLONE */
1735				}
1736#ifdef __WALL
1737			}
1738#endif
1739			if (!WIFSTOPPED(status)) {
1740				/* Au revoir, mon ami. */
1741				break;
1742			}
1743			if (WSTOPSIG(status) == SIGSTOP) {
1744				ptrace_restart(PTRACE_DETACH, tcp, sig);
1745				break;
1746			}
1747			error = ptrace_restart(PTRACE_CONT, tcp,
1748					WSTOPSIG(status) == syscall_trap_sig ? 0
1749					: WSTOPSIG(status));
1750			if (error < 0)
1751				break;
1752		}
1753	}
1754#endif /* LINUX */
1755
1756#if defined(SUNOS4)
1757	/* PTRACE_DETACH won't respect `sig' argument, so we post it here. */
1758	if (sig && kill(tcp->pid, sig) < 0)
1759		perror("detach: kill");
1760	sig = 0;
1761	error = ptrace_restart(PTRACE_DETACH, tcp, sig);
1762#endif /* SUNOS4 */
1763
1764	if (!qflag)
1765		fprintf(stderr, "Process %u detached\n", tcp->pid);
1766
1767	droptcb(tcp);
1768
1769	return error;
1770}
1771
1772#ifdef USE_PROCFS
1773
1774static void reaper(int sig)
1775{
1776	int pid;
1777	int status;
1778
1779	while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
1780	}
1781}
1782
1783#endif /* USE_PROCFS */
1784
1785static void
1786cleanup(void)
1787{
1788	int i;
1789	struct tcb *tcp;
1790
1791	for (i = 0; i < tcbtabsize; i++) {
1792		tcp = tcbtab[i];
1793		if (!(tcp->flags & TCB_INUSE))
1794			continue;
1795		if (debug)
1796			fprintf(stderr,
1797				"cleanup: looking at pid %u\n", tcp->pid);
1798		if (tcp_last &&
1799		    (!outfname || followfork < 2 || tcp_last == tcp)) {
1800			tprintf(" <unfinished ...>");
1801			printtrailer();
1802		}
1803		if (tcp->flags & TCB_ATTACHED)
1804			detach(tcp, 0);
1805		else {
1806			kill(tcp->pid, SIGCONT);
1807			kill(tcp->pid, SIGTERM);
1808		}
1809	}
1810	if (cflag)
1811		call_summary(outf);
1812}
1813
1814static void
1815interrupt(int sig)
1816{
1817	interrupted = 1;
1818}
1819
1820#ifndef HAVE_STRERROR
1821
1822#if !HAVE_DECL_SYS_ERRLIST
1823extern int sys_nerr;
1824extern char *sys_errlist[];
1825#endif /* HAVE_DECL_SYS_ERRLIST */
1826
1827const char *
1828strerror(int err_no)
1829{
1830	static char buf[64];
1831
1832	if (err_no < 1 || err_no >= sys_nerr) {
1833		sprintf(buf, "Unknown error %d", err_no);
1834		return buf;
1835	}
1836	return sys_errlist[err_no];
1837}
1838
1839#endif /* HAVE_STERRROR */
1840
1841#ifndef HAVE_STRSIGNAL
1842
1843#if defined HAVE_SYS_SIGLIST && !defined HAVE_DECL_SYS_SIGLIST
1844extern char *sys_siglist[];
1845#endif
1846#if defined HAVE_SYS__SIGLIST && !defined HAVE_DECL__SYS_SIGLIST
1847extern char *_sys_siglist[];
1848#endif
1849
1850const char *
1851strsignal(int sig)
1852{
1853	static char buf[64];
1854
1855	if (sig < 1 || sig >= NSIG) {
1856		sprintf(buf, "Unknown signal %d", sig);
1857		return buf;
1858	}
1859#ifdef HAVE__SYS_SIGLIST
1860	return _sys_siglist[sig];
1861#else
1862	return sys_siglist[sig];
1863#endif
1864}
1865
1866#endif /* HAVE_STRSIGNAL */
1867
1868#ifdef USE_PROCFS
1869
1870static void
1871rebuild_pollv(void)
1872{
1873	int i, j;
1874
1875	free(pollv);
1876	pollv = malloc(nprocs * sizeof(pollv[0]));
1877	if (!pollv)
1878		die_out_of_memory();
1879
1880	for (i = j = 0; i < tcbtabsize; i++) {
1881		struct tcb *tcp = tcbtab[i];
1882		if (!(tcp->flags & TCB_INUSE))
1883			continue;
1884		pollv[j].fd = tcp->pfd;
1885		pollv[j].events = POLLWANT;
1886		j++;
1887	}
1888	if (j != nprocs) {
1889		error_msg_and_die("proc miscount");
1890	}
1891}
1892
1893#ifndef HAVE_POLLABLE_PROCFS
1894
1895static void
1896proc_poll_open(void)
1897{
1898	int i;
1899
1900	if (pipe(proc_poll_pipe) < 0) {
1901		perror_msg_and_die("pipe");
1902	}
1903	for (i = 0; i < 2; i++) {
1904		set_cloexec_flag(proc_poll_pipe[i]);
1905	}
1906}
1907
1908static int
1909proc_poll(struct pollfd *pollv, int nfds, int timeout)
1910{
1911	int i;
1912	int n;
1913	struct proc_pollfd pollinfo;
1914
1915	n = read(proc_poll_pipe[0], &pollinfo, sizeof(pollinfo));
1916	if (n < 0)
1917		return n;
1918	if (n != sizeof(struct proc_pollfd)) {
1919		error_msg_and_die("panic: short read: %d", n);
1920	}
1921	for (i = 0; i < nprocs; i++) {
1922		if (pollv[i].fd == pollinfo.fd)
1923			pollv[i].revents = pollinfo.revents;
1924		else
1925			pollv[i].revents = 0;
1926	}
1927	poller_pid = pollinfo.pid;
1928	return 1;
1929}
1930
1931static void
1932wakeup_handler(int sig)
1933{
1934}
1935
1936static void
1937proc_poller(int pfd)
1938{
1939	struct proc_pollfd pollinfo;
1940	struct sigaction sa;
1941	sigset_t blocked_set, empty_set;
1942	int i;
1943	int n;
1944	struct rlimit rl;
1945#ifdef FREEBSD
1946	struct procfs_status pfs;
1947#endif /* FREEBSD */
1948
1949	switch (fork()) {
1950	case -1:
1951		perror_msg_and_die("fork");
1952	case 0:
1953		break;
1954	default:
1955		return;
1956	}
1957
1958	sa.sa_handler = interactive ? SIG_DFL : SIG_IGN;
1959	sa.sa_flags = 0;
1960	sigemptyset(&sa.sa_mask);
1961	sigaction(SIGHUP, &sa, NULL);
1962	sigaction(SIGINT, &sa, NULL);
1963	sigaction(SIGQUIT, &sa, NULL);
1964	sigaction(SIGPIPE, &sa, NULL);
1965	sigaction(SIGTERM, &sa, NULL);
1966	sa.sa_handler = wakeup_handler;
1967	sigaction(SIGUSR1, &sa, NULL);
1968	sigemptyset(&blocked_set);
1969	sigaddset(&blocked_set, SIGUSR1);
1970	sigprocmask(SIG_BLOCK, &blocked_set, NULL);
1971	sigemptyset(&empty_set);
1972
1973	if (getrlimit(RLIMIT_NOFILE, &rl) < 0) {
1974		perror_msg_and_die("getrlimit(RLIMIT_NOFILE, ...)");
1975	}
1976	n = rl.rlim_cur;
1977	for (i = 0; i < n; i++) {
1978		if (i != pfd && i != proc_poll_pipe[1])
1979			close(i);
1980	}
1981
1982	pollinfo.fd = pfd;
1983	pollinfo.pid = getpid();
1984	for (;;) {
1985#ifndef FREEBSD
1986		if (ioctl(pfd, PIOCWSTOP, NULL) < 0)
1987#else
1988		if (ioctl(pfd, PIOCWSTOP, &pfs) < 0)
1989#endif
1990		{
1991			switch (errno) {
1992			case EINTR:
1993				continue;
1994			case EBADF:
1995				pollinfo.revents = POLLERR;
1996				break;
1997			case ENOENT:
1998				pollinfo.revents = POLLHUP;
1999				break;
2000			default:
2001				perror("proc_poller: PIOCWSTOP");
2002			}
2003			write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
2004			_exit(0);
2005		}
2006		pollinfo.revents = POLLWANT;
2007		write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
2008		sigsuspend(&empty_set);
2009	}
2010}
2011
2012#endif /* !HAVE_POLLABLE_PROCFS */
2013
2014static int
2015choose_pfd()
2016{
2017	int i, j;
2018	struct tcb *tcp;
2019
2020	static int last;
2021
2022	if (followfork < 2 &&
2023	    last < nprocs && (pollv[last].revents & POLLWANT)) {
2024		/*
2025		 * The previous process is ready to run again.  We'll
2026		 * let it do so if it is currently in a syscall.  This
2027		 * heuristic improves the readability of the trace.
2028		 */
2029		tcp = pfd2tcb(pollv[last].fd);
2030		if (tcp && exiting(tcp))
2031			return pollv[last].fd;
2032	}
2033
2034	for (i = 0; i < nprocs; i++) {
2035		/* Let competing children run round robin. */
2036		j = (i + last + 1) % nprocs;
2037		if (pollv[j].revents & (POLLHUP | POLLERR)) {
2038			tcp = pfd2tcb(pollv[j].fd);
2039			if (!tcp) {
2040				error_msg_and_die("lost proc");
2041			}
2042			droptcb(tcp);
2043			return -1;
2044		}
2045		if (pollv[j].revents & POLLWANT) {
2046			last = j;
2047			return pollv[j].fd;
2048		}
2049	}
2050	error_msg_and_die("nothing ready");
2051}
2052
2053static int
2054trace(void)
2055{
2056#ifdef POLL_HACK
2057	struct tcb *in_syscall = NULL;
2058#endif
2059	struct tcb *tcp;
2060	int pfd;
2061	int what;
2062	int ioctl_result = 0, ioctl_errno = 0;
2063	long arg;
2064
2065	for (;;) {
2066		if (interactive)
2067			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2068
2069		if (nprocs == 0)
2070			break;
2071
2072		switch (nprocs) {
2073		case 1:
2074#ifndef HAVE_POLLABLE_PROCFS
2075			if (proc_poll_pipe[0] == -1) {
2076#endif
2077				tcp = first_used_tcb();
2078				if (!tcp)
2079					continue;
2080				pfd = tcp->pfd;
2081				if (pfd == -1)
2082					continue;
2083				break;
2084#ifndef HAVE_POLLABLE_PROCFS
2085			}
2086			/* fall through ... */
2087#endif /* !HAVE_POLLABLE_PROCFS */
2088		default:
2089#ifdef HAVE_POLLABLE_PROCFS
2090#ifdef POLL_HACK
2091		        /* On some systems (e.g. UnixWare) we get too much ugly
2092			   "unfinished..." stuff when multiple proceses are in
2093			   syscalls.  Here's a nasty hack */
2094
2095			if (in_syscall) {
2096				struct pollfd pv;
2097				tcp = in_syscall;
2098				in_syscall = NULL;
2099				pv.fd = tcp->pfd;
2100				pv.events = POLLWANT;
2101				what = poll(&pv, 1, 1);
2102				if (what < 0) {
2103					if (interrupted)
2104						return 0;
2105					continue;
2106				}
2107				else if (what == 1 && pv.revents & POLLWANT) {
2108					goto FOUND;
2109				}
2110			}
2111#endif
2112
2113			if (poll(pollv, nprocs, INFTIM) < 0) {
2114				if (interrupted)
2115					return 0;
2116				continue;
2117			}
2118#else /* !HAVE_POLLABLE_PROCFS */
2119			if (proc_poll(pollv, nprocs, INFTIM) < 0) {
2120				if (interrupted)
2121					return 0;
2122				continue;
2123			}
2124#endif /* !HAVE_POLLABLE_PROCFS */
2125			pfd = choose_pfd();
2126			if (pfd == -1)
2127				continue;
2128			break;
2129		}
2130
2131		/* Look up `pfd' in our table. */
2132		tcp = pfd2tcb(pfd);
2133		if (tcp == NULL) {
2134			error_msg_and_die("unknown pfd: %u", pfd);
2135		}
2136#ifdef POLL_HACK
2137	FOUND:
2138#endif
2139		/* Get the status of the process. */
2140		if (!interrupted) {
2141#ifndef FREEBSD
2142			ioctl_result = IOCTL_WSTOP(tcp);
2143#else /* FREEBSD */
2144			/* Thanks to some scheduling mystery, the first poller
2145			   sometimes waits for the already processed end of fork
2146			   event. Doing a non blocking poll here solves the problem. */
2147			if (proc_poll_pipe[0] != -1)
2148				ioctl_result = IOCTL_STATUS(tcp);
2149			else
2150				ioctl_result = IOCTL_WSTOP(tcp);
2151#endif /* FREEBSD */
2152			ioctl_errno = errno;
2153#ifndef HAVE_POLLABLE_PROCFS
2154			if (proc_poll_pipe[0] != -1) {
2155				if (ioctl_result < 0)
2156					kill(poller_pid, SIGKILL);
2157				else
2158					kill(poller_pid, SIGUSR1);
2159			}
2160#endif /* !HAVE_POLLABLE_PROCFS */
2161		}
2162		if (interrupted)
2163			return 0;
2164
2165		if (interactive)
2166			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2167
2168		if (ioctl_result < 0) {
2169			/* Find out what happened if it failed. */
2170			switch (ioctl_errno) {
2171			case EINTR:
2172			case EBADF:
2173				continue;
2174#ifdef FREEBSD
2175			case ENOTTY:
2176#endif
2177			case ENOENT:
2178				droptcb(tcp);
2179				continue;
2180			default:
2181				perror_msg_and_die("PIOCWSTOP");
2182			}
2183		}
2184
2185#ifdef FREEBSD
2186		if ((tcp->flags & TCB_STARTUP) && (tcp->status.PR_WHY == PR_SYSEXIT)) {
2187			/* discard first event for a syscall we never entered */
2188			IOCTL(tcp->pfd, PIOCRUN, 0);
2189			continue;
2190		}
2191#endif
2192
2193		/* clear the just started flag */
2194		tcp->flags &= ~TCB_STARTUP;
2195
2196		/* set current output file */
2197		outf = tcp->outf;
2198		curcol = tcp->curcol;
2199
2200		if (cflag) {
2201			struct timeval stime;
2202#ifdef FREEBSD
2203			char buf[1024];
2204			int len;
2205
2206			len = pread(tcp->pfd_status, buf, sizeof(buf) - 1, 0);
2207			if (len > 0) {
2208				buf[len] = '\0';
2209				sscanf(buf,
2210				       "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d %*d,%*d %ld,%ld",
2211				       &stime.tv_sec, &stime.tv_usec);
2212			} else
2213				stime.tv_sec = stime.tv_usec = 0;
2214#else /* !FREEBSD */
2215			stime.tv_sec = tcp->status.pr_stime.tv_sec;
2216			stime.tv_usec = tcp->status.pr_stime.tv_nsec/1000;
2217#endif /* !FREEBSD */
2218			tv_sub(&tcp->dtime, &stime, &tcp->stime);
2219			tcp->stime = stime;
2220		}
2221		what = tcp->status.PR_WHAT;
2222		switch (tcp->status.PR_WHY) {
2223#ifndef FREEBSD
2224		case PR_REQUESTED:
2225			if (tcp->status.PR_FLAGS & PR_ASLEEP) {
2226				tcp->status.PR_WHY = PR_SYSENTRY;
2227				if (trace_syscall(tcp) < 0) {
2228					error_msg_and_die("syscall trouble");
2229				}
2230			}
2231			break;
2232#endif /* !FREEBSD */
2233		case PR_SYSENTRY:
2234#ifdef POLL_HACK
2235		        in_syscall = tcp;
2236#endif
2237		case PR_SYSEXIT:
2238			if (trace_syscall(tcp) < 0) {
2239				error_msg_and_die("syscall trouble");
2240			}
2241			break;
2242		case PR_SIGNALLED:
2243			if (cflag != CFLAG_ONLY_STATS
2244			    && (qual_flags[what] & QUAL_SIGNAL)) {
2245				printleader(tcp);
2246				tprintf("--- %s (%s) ---",
2247					signame(what), strsignal(what));
2248				printtrailer();
2249#ifdef PR_INFO
2250				if (tcp->status.PR_INFO.si_signo == what) {
2251					printleader(tcp);
2252					tprintf("    siginfo=");
2253					printsiginfo(&tcp->status.PR_INFO, 1);
2254					printtrailer();
2255				}
2256#endif
2257			}
2258			break;
2259		case PR_FAULTED:
2260			if (cflag != CFLAGS_ONLY_STATS
2261			    && (qual_flags[what] & QUAL_FAULT)) {
2262				printleader(tcp);
2263				tprintf("=== FAULT %d ===", what);
2264				printtrailer();
2265			}
2266			break;
2267#ifdef FREEBSD
2268		case 0: /* handle case we polled for nothing */
2269			continue;
2270#endif
2271		default:
2272			error_msg_and_die("odd stop %d", tcp->status.PR_WHY);
2273			break;
2274		}
2275		/* Remember current print column before continuing. */
2276		tcp->curcol = curcol;
2277		arg = 0;
2278#ifndef FREEBSD
2279		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0)
2280#else
2281		if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0)
2282#endif
2283		{
2284			perror_msg_and_die("PIOCRUN");
2285		}
2286	}
2287	return 0;
2288}
2289
2290#else /* !USE_PROCFS */
2291
2292static int
2293trace()
2294{
2295	int pid;
2296	int wait_errno;
2297	int status;
2298	struct tcb *tcp;
2299#ifdef LINUX
2300	struct rusage ru;
2301	struct rusage *rup = cflag ? &ru : NULL;
2302# ifdef __WALL
2303	static int wait4_options = __WALL;
2304# endif
2305#endif /* LINUX */
2306
2307	while (nprocs != 0) {
2308		if (interrupted)
2309			return 0;
2310		if (interactive)
2311			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2312#ifdef LINUX
2313# ifdef __WALL
2314		pid = wait4(-1, &status, wait4_options, rup);
2315		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
2316			/* this kernel does not support __WALL */
2317			wait4_options &= ~__WALL;
2318			pid = wait4(-1, &status, wait4_options, rup);
2319		}
2320		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
2321			/* most likely a "cloned" process */
2322			pid = wait4(-1, &status, __WCLONE, rup);
2323			if (pid < 0) {
2324				perror_msg("wait4(__WCLONE) failed");
2325			}
2326		}
2327# else
2328		pid = wait4(-1, &status, 0, rup);
2329# endif /* __WALL */
2330#endif /* LINUX */
2331#ifdef SUNOS4
2332		pid = wait(&status);
2333#endif
2334		wait_errno = errno;
2335		if (interactive)
2336			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2337
2338		if (pid < 0) {
2339			switch (wait_errno) {
2340			case EINTR:
2341				continue;
2342			case ECHILD:
2343				/*
2344				 * We would like to verify this case
2345				 * but sometimes a race in Solbourne's
2346				 * version of SunOS sometimes reports
2347				 * ECHILD before sending us SIGCHILD.
2348				 */
2349				return 0;
2350			default:
2351				errno = wait_errno;
2352				perror("strace: wait");
2353				return -1;
2354			}
2355		}
2356		if (pid == popen_pid) {
2357			if (WIFEXITED(status) || WIFSIGNALED(status))
2358				popen_pid = 0;
2359			continue;
2360		}
2361		if (debug) {
2362			char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
2363#ifdef LINUX
2364			unsigned ev = (unsigned)status >> 16;
2365			if (ev) {
2366				static const char *const event_names[] = {
2367					[PTRACE_EVENT_CLONE] = "CLONE",
2368					[PTRACE_EVENT_FORK]  = "FORK",
2369					[PTRACE_EVENT_VFORK] = "VFORK",
2370					[PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
2371					[PTRACE_EVENT_EXEC]  = "EXEC",
2372					[PTRACE_EVENT_EXIT]  = "EXIT",
2373				};
2374				const char *e;
2375				if (ev < ARRAY_SIZE(event_names))
2376					e = event_names[ev];
2377				else {
2378					sprintf(buf, "?? (%u)", ev);
2379					e = buf;
2380				}
2381				fprintf(stderr, " PTRACE_EVENT_%s", e);
2382			}
2383#endif
2384			strcpy(buf, "???");
2385			if (WIFSIGNALED(status))
2386#ifdef WCOREDUMP
2387				sprintf(buf, "WIFSIGNALED,%ssig=%s",
2388						WCOREDUMP(status) ? "core," : "",
2389						signame(WTERMSIG(status)));
2390#else
2391				sprintf(buf, "WIFSIGNALED,sig=%s",
2392						signame(WTERMSIG(status)));
2393#endif
2394			if (WIFEXITED(status))
2395				sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
2396			if (WIFSTOPPED(status))
2397				sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
2398#ifdef WIFCONTINUED
2399			if (WIFCONTINUED(status))
2400				strcpy(buf, "WIFCONTINUED");
2401#endif
2402			fprintf(stderr, " [wait(0x%04x) = %u] %s\n", status, pid, buf);
2403		}
2404
2405		/* Look up `pid' in our table. */
2406		tcp = pid2tcb(pid);
2407		if (tcp == NULL) {
2408#ifdef LINUX
2409			if (followfork) {
2410				/* This is needed to go with the CLONE_PTRACE
2411				   changes in process.c/util.c: we might see
2412				   the child's initial trap before we see the
2413				   parent return from the clone syscall.
2414				   Leave the child suspended until the parent
2415				   returns from its system call.  Only then
2416				   will we have the association of parent and
2417				   child so that we know how to do clearbpt
2418				   in the child.  */
2419				tcp = alloctcb(pid);
2420				tcp->flags |= TCB_ATTACHED;
2421				if (!qflag)
2422					fprintf(stderr, "Process %d attached\n",
2423						pid);
2424			}
2425			else
2426				/* This can happen if a clone call used
2427				   CLONE_PTRACE itself.  */
2428#endif
2429			{
2430				if (WIFSTOPPED(status))
2431					ptrace(PTRACE_CONT, pid, (char *) 1, 0);
2432				error_msg_and_die("Unknown pid: %u", pid);
2433			}
2434		}
2435		/* set current output file */
2436		outf = tcp->outf;
2437		curcol = tcp->curcol;
2438#ifdef LINUX
2439		if (cflag) {
2440			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
2441			tcp->stime = ru.ru_stime;
2442		}
2443#endif
2444
2445		if (WIFSIGNALED(status)) {
2446			if (pid == strace_child)
2447				exit_code = 0x100 | WTERMSIG(status);
2448			if (cflag != CFLAG_ONLY_STATS
2449			    && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL)) {
2450				printleader(tcp);
2451#ifdef WCOREDUMP
2452				tprintf("+++ killed by %s %s+++",
2453					signame(WTERMSIG(status)),
2454					WCOREDUMP(status) ? "(core dumped) " : "");
2455#else
2456				tprintf("+++ killed by %s +++",
2457					signame(WTERMSIG(status)));
2458#endif
2459				printtrailer();
2460			}
2461			droptcb(tcp);
2462			continue;
2463		}
2464		if (WIFEXITED(status)) {
2465			if (pid == strace_child)
2466				exit_code = WEXITSTATUS(status);
2467			if (tcp == tcp_last) {
2468				if ((tcp->flags & (TCB_INSYSCALL|TCB_REPRINT)) == TCB_INSYSCALL)
2469					tprintf(" <unfinished ... exit status %d>\n",
2470						WEXITSTATUS(status));
2471				tcp_last = NULL;
2472			}
2473			if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
2474				printleader(tcp);
2475				tprintf("+++ exited with %d +++", WEXITSTATUS(status));
2476				printtrailer();
2477			}
2478			droptcb(tcp);
2479			continue;
2480		}
2481		if (!WIFSTOPPED(status)) {
2482			fprintf(stderr, "PANIC: pid %u not stopped\n", pid);
2483			droptcb(tcp);
2484			continue;
2485		}
2486
2487		if (status >> 16) {
2488			/* Ptrace event (we ignore all of them for now) */
2489			goto tracing;
2490		}
2491
2492		/*
2493		 * Interestingly, the process may stop
2494		 * with STOPSIG equal to some other signal
2495		 * than SIGSTOP if we happend to attach
2496		 * just before the process takes a signal.
2497		 * A no-MMU vforked child won't send up a signal,
2498		 * so skip the first (lost) execve notification.
2499		 */
2500		if ((tcp->flags & TCB_STARTUP) &&
2501		    (WSTOPSIG(status) == SIGSTOP || strace_vforked)) {
2502			/*
2503			 * This flag is there to keep us in sync.
2504			 * Next time this process stops it should
2505			 * really be entering a system call.
2506			 */
2507			tcp->flags &= ~TCB_STARTUP;
2508			if (tcp->flags & TCB_BPTSET) {
2509				/*
2510				 * One example is a breakpoint inherited from
2511				 * parent through fork().
2512				 */
2513				if (clearbpt(tcp) < 0) /* Pretty fatal */ {
2514					droptcb(tcp);
2515					cleanup();
2516					return -1;
2517				}
2518			}
2519#ifdef LINUX
2520			if (ptrace_setoptions) {
2521				if (debug)
2522					fprintf(stderr, "setting opts %x on pid %d\n", ptrace_setoptions, tcp->pid);
2523				if (ptrace(PTRACE_SETOPTIONS, tcp->pid, NULL, ptrace_setoptions) < 0) {
2524					if (errno != ESRCH) {
2525						/* Should never happen, really */
2526						perror_msg_and_die("PTRACE_SETOPTIONS");
2527					}
2528				}
2529			}
2530#endif
2531			goto tracing;
2532		}
2533
2534		if (WSTOPSIG(status) != syscall_trap_sig) {
2535			if (WSTOPSIG(status) == SIGSTOP &&
2536					(tcp->flags & TCB_SIGTRAPPED)) {
2537				/*
2538				 * Trapped attempt to block SIGTRAP
2539				 * Hope we are back in control now.
2540				 */
2541				tcp->flags &= ~(TCB_INSYSCALL | TCB_SIGTRAPPED);
2542				if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
2543					cleanup();
2544					return -1;
2545				}
2546				continue;
2547			}
2548			if (cflag != CFLAG_ONLY_STATS
2549			    && (qual_flags[WSTOPSIG(status)] & QUAL_SIGNAL)) {
2550				siginfo_t si;
2551#if defined(PT_CR_IPSR) && defined(PT_CR_IIP)
2552				long pc = 0;
2553				long psr = 0;
2554
2555				upeek(tcp, PT_CR_IPSR, &psr);
2556				upeek(tcp, PT_CR_IIP, &pc);
2557
2558# define PSR_RI	41
2559				pc += (psr >> PSR_RI) & 0x3;
2560# define PC_FORMAT_STR	" @ %lx"
2561# define PC_FORMAT_ARG	, pc
2562#else
2563# define PC_FORMAT_STR	""
2564# define PC_FORMAT_ARG	/* nothing */
2565#endif
2566				printleader(tcp);
2567				if (ptrace(PTRACE_GETSIGINFO, pid, 0, &si) == 0) {
2568					tprintf("--- ");
2569					printsiginfo(&si, verbose(tcp));
2570					tprintf(" (%s)" PC_FORMAT_STR " ---",
2571						strsignal(WSTOPSIG(status))
2572						PC_FORMAT_ARG);
2573				} else
2574					tprintf("--- %s by %s" PC_FORMAT_STR " ---",
2575						strsignal(WSTOPSIG(status)),
2576						signame(WSTOPSIG(status))
2577						PC_FORMAT_ARG);
2578				printtrailer();
2579			}
2580			if (ptrace_restart(PTRACE_SYSCALL, tcp, WSTOPSIG(status)) < 0) {
2581				cleanup();
2582				return -1;
2583			}
2584			continue;
2585		}
2586
2587		/* We handled quick cases, we are permitted to interrupt now. */
2588		if (interrupted)
2589			return 0;
2590
2591		/* This should be syscall entry or exit.
2592		 * (Or it still can be that pesky post-execve SIGTRAP!)
2593		 * Handle it.
2594		 */
2595		if (trace_syscall(tcp) < 0 && !tcp->ptrace_errno) {
2596			/* ptrace() failed in trace_syscall() with ESRCH.
2597			 * Likely a result of process disappearing mid-flight.
2598			 * Observed case: exit_group() terminating
2599			 * all processes in thread group. In this case, threads
2600			 * "disappear" in an unpredictable moment without any
2601			 * notification to strace via wait().
2602			 */
2603			if (tcp->flags & TCB_ATTACHED) {
2604				if (tcp_last) {
2605					/* Do we have dangling line "syscall(param, param"?
2606					 * Finish the line then.
2607					 */
2608					tcp_last->flags |= TCB_REPRINT;
2609					tprintf(" <unfinished ...>");
2610					printtrailer();
2611				}
2612				detach(tcp, 0);
2613			} else {
2614				ptrace(PTRACE_KILL,
2615					tcp->pid, (char *) 1, SIGTERM);
2616				droptcb(tcp);
2617			}
2618			continue;
2619		}
2620	tracing:
2621		/* Remember current print column before continuing. */
2622		tcp->curcol = curcol;
2623		if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
2624			cleanup();
2625			return -1;
2626		}
2627	}
2628	return 0;
2629}
2630
2631#endif /* !USE_PROCFS */
2632
2633void
2634tprintf(const char *fmt, ...)
2635{
2636	va_list args;
2637
2638	va_start(args, fmt);
2639	if (outf) {
2640		int n = vfprintf(outf, fmt, args);
2641		if (n < 0) {
2642			if (outf != stderr)
2643				perror(outfname == NULL
2644				       ? "<writing to pipe>" : outfname);
2645		} else
2646			curcol += n;
2647	}
2648	va_end(args);
2649}
2650
2651void
2652tprints(const char *str)
2653{
2654	if (outf) {
2655		int n = fputs(str, outf);
2656		if (n >= 0) {
2657			curcol += strlen(str);
2658			return;
2659		}
2660		if (outf != stderr)
2661			perror(outfname == NULL
2662			       ? "<writing to pipe>" : outfname);
2663	}
2664}
2665
2666void
2667printleader(struct tcb *tcp)
2668{
2669	if (tcp_last) {
2670		if (tcp_last->ptrace_errno) {
2671			if (tcp_last->flags & TCB_INSYSCALL) {
2672				tprintf(" <unavailable>) ");
2673				tabto();
2674			}
2675			tprintf("= ? <unavailable>\n");
2676			tcp_last->ptrace_errno = 0;
2677		} else if (!outfname || followfork < 2 || tcp_last == tcp) {
2678			tcp_last->flags |= TCB_REPRINT;
2679			tprintf(" <unfinished ...>\n");
2680		}
2681	}
2682	curcol = 0;
2683	if ((followfork == 1 || pflag_seen > 1) && outfname)
2684		tprintf("%-5d ", tcp->pid);
2685	else if (nprocs > 1 && !outfname)
2686		tprintf("[pid %5u] ", tcp->pid);
2687	if (tflag) {
2688		char str[sizeof("HH:MM:SS")];
2689		struct timeval tv, dtv;
2690		static struct timeval otv;
2691
2692		gettimeofday(&tv, NULL);
2693		if (rflag) {
2694			if (otv.tv_sec == 0)
2695				otv = tv;
2696			tv_sub(&dtv, &tv, &otv);
2697			tprintf("%6ld.%06ld ",
2698				(long) dtv.tv_sec, (long) dtv.tv_usec);
2699			otv = tv;
2700		}
2701		else if (tflag > 2) {
2702			tprintf("%ld.%06ld ",
2703				(long) tv.tv_sec, (long) tv.tv_usec);
2704		}
2705		else {
2706			time_t local = tv.tv_sec;
2707			strftime(str, sizeof(str), "%T", localtime(&local));
2708			if (tflag > 1)
2709				tprintf("%s.%06ld ", str, (long) tv.tv_usec);
2710			else
2711				tprintf("%s ", str);
2712		}
2713	}
2714	if (iflag)
2715		printcall(tcp);
2716}
2717
2718void
2719tabto(void)
2720{
2721	if (curcol < acolumn)
2722		tprints(acolumn_spaces + curcol);
2723}
2724
2725void
2726printtrailer(void)
2727{
2728	tprintf("\n");
2729	tcp_last = NULL;
2730}
2731
2732#ifdef HAVE_MP_PROCFS
2733
2734int
2735mp_ioctl(int fd, int cmd, void *arg, int size)
2736{
2737	struct iovec iov[2];
2738	int n = 1;
2739
2740	iov[0].iov_base = &cmd;
2741	iov[0].iov_len = sizeof cmd;
2742	if (arg) {
2743		++n;
2744		iov[1].iov_base = arg;
2745		iov[1].iov_len = size;
2746	}
2747
2748	return writev(fd, iov, n);
2749}
2750
2751#endif
2752