strace.c revision 023b7700de942f59fcb4cd30903969d276a4d796
1/*
2 * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3 * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4 * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5 * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 *	$Id$
31 */
32
33#include "defs.h"
34
35#include <sys/types.h>
36#include <stdarg.h>
37#include <signal.h>
38#include <errno.h>
39#include <sys/param.h>
40#include <fcntl.h>
41#include <sys/resource.h>
42#include <sys/wait.h>
43#include <sys/stat.h>
44#include <pwd.h>
45#include <grp.h>
46#include <string.h>
47#include <dirent.h>
48
49#ifdef LINUX
50# include <asm/unistd.h>
51# if defined __NR_tkill
52#  define my_tkill(tid, sig) syscall(__NR_tkill, (tid), (sig))
53# else
54   /* kill() may choose arbitrarily the target task of the process group
55      while we later wait on a that specific TID.  PID process waits become
56      TID task specific waits for a process under ptrace(2).  */
57#  warning "Neither tkill(2) nor tgkill(2) available, risk of strace hangs!"
58#  define my_tkill(tid, sig) kill((tid), (sig))
59# endif
60#endif
61
62#if defined(IA64) && defined(LINUX)
63# include <asm/ptrace_offsets.h>
64#endif
65
66#ifdef USE_PROCFS
67#include <poll.h>
68#endif
69
70#ifdef SVR4
71#include <sys/stropts.h>
72#ifdef HAVE_MP_PROCFS
73#ifdef HAVE_SYS_UIO_H
74#include <sys/uio.h>
75#endif
76#endif
77#endif
78extern char **environ;
79extern int optind;
80extern char *optarg;
81
82
83int debug = 0, followfork = 0;
84unsigned int ptrace_setoptions = 0;
85/* Which WSTOPSIG(status) value marks syscall traps? */
86static unsigned int syscall_trap_sig = SIGTRAP;
87int dtime = 0, xflag = 0, qflag = 0;
88cflag_t cflag = CFLAG_NONE;
89static int iflag = 0, interactive = 0, pflag_seen = 0, rflag = 0, tflag = 0;
90/*
91 * daemonized_tracer supports -D option.
92 * With this option, strace forks twice.
93 * Unlike normal case, with -D *grandparent* process exec's,
94 * becoming a traced process. Child exits (this prevents traced process
95 * from having children it doesn't expect to have), and grandchild
96 * attaches to grandparent similarly to strace -p PID.
97 * This allows for more transparent interaction in cases
98 * when process and its parent are communicating via signals,
99 * wait() etc. Without -D, strace process gets lodged in between,
100 * disrupting parent<->child link.
101 */
102static bool daemonized_tracer = 0;
103
104/* Sometimes we want to print only succeeding syscalls. */
105int not_failing_only = 0;
106
107/* Show path associated with fd arguments */
108int show_fd_path = 0;
109
110/* are we filtering traces based on paths? */
111int tracing_paths = 0;
112
113static int exit_code = 0;
114static int strace_child = 0;
115static int strace_tracer_pid = 0;
116
117static char *username = NULL;
118static uid_t run_uid;
119static gid_t run_gid;
120
121int max_strlen = DEFAULT_STRLEN;
122static int acolumn = DEFAULT_ACOLUMN;
123static char *acolumn_spaces;
124static char *outfname = NULL;
125static FILE *outf;
126static int curcol;
127static struct tcb **tcbtab;
128static unsigned int nprocs, tcbtabsize;
129static const char *progname;
130
131static int detach(struct tcb *tcp);
132static int trace(void);
133static void cleanup(void);
134static void interrupt(int sig);
135static sigset_t empty_set, blocked_set;
136
137#ifdef HAVE_SIG_ATOMIC_T
138static volatile sig_atomic_t interrupted;
139#else /* !HAVE_SIG_ATOMIC_T */
140static volatile int interrupted;
141#endif /* !HAVE_SIG_ATOMIC_T */
142
143#ifdef USE_PROCFS
144
145static struct tcb *pfd2tcb(int pfd);
146static void reaper(int sig);
147static void rebuild_pollv(void);
148static struct pollfd *pollv;
149
150#ifndef HAVE_POLLABLE_PROCFS
151
152static void proc_poll_open(void);
153static void proc_poller(int pfd);
154
155struct proc_pollfd {
156	int fd;
157	int revents;
158	int pid;
159};
160
161static int poller_pid;
162static int proc_poll_pipe[2] = { -1, -1 };
163
164#endif /* !HAVE_POLLABLE_PROCFS */
165
166#ifdef HAVE_MP_PROCFS
167#define POLLWANT	POLLWRNORM
168#else
169#define POLLWANT	POLLPRI
170#endif
171#endif /* USE_PROCFS */
172
173static void
174usage(FILE *ofp, int exitval)
175{
176	fprintf(ofp, "\
177usage: strace [-CdDffhiqrtttTvVxxy] [-a column] [-e expr] ... [-o file]\n\
178              [-p pid] ... [-s strsize] [-u username] [-E var=val] ...\n\
179              [-P path] [command [arg ...]]\n\
180   or: strace -c [-D] [-e expr] ... [-O overhead] [-S sortby] [-E var=val] ...\n\
181              [command [arg ...]]\n\
182-c -- count time, calls, and errors for each syscall and report summary\n\
183-C -- like -c but also print regular output while processes are running\n\
184-f -- follow forks, -ff -- with output into separate files\n\
185-F -- attempt to follow vforks, -h -- print help message\n\
186-i -- print instruction pointer at time of syscall\n\
187-q -- suppress messages about attaching, detaching, etc.\n\
188-r -- print relative timestamp, -t -- absolute timestamp, -tt -- with usecs\n\
189-T -- print time spent in each syscall, -V -- print version\n\
190-v -- verbose mode: print unabbreviated argv, stat, termio[s], etc. args\n\
191-x -- print non-ascii strings in hex, -xx -- print all strings in hex\n\
192-y -- print paths associated with file descriptor arguments\n\
193-a column -- alignment COLUMN for printing syscall results (default %d)\n\
194-e expr -- a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
195   options: trace, abbrev, verbose, raw, signal, read, or write\n\
196-o file -- send trace output to FILE instead of stderr\n\
197-O overhead -- set overhead for tracing syscalls to OVERHEAD usecs\n\
198-p pid -- trace process with process id PID, may be repeated\n\
199-D -- run tracer process as a detached grandchild, not as parent\n\
200-s strsize -- limit length of print strings to STRSIZE chars (default %d)\n\
201-S sortby -- sort syscall counts by: time, calls, name, nothing (default %s)\n\
202-u username -- run command as username handling setuid and/or setgid\n\
203-E var=val -- put var=val in the environment for command\n\
204-E var -- remove var from the environment for command\n\
205-P path -- trace accesses to path\n\
206" /* this is broken, so don't document it
207-z -- print only succeeding syscalls\n\
208  */
209, DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
210	exit(exitval);
211}
212
213static void die(void) __attribute__ ((noreturn));
214static void die(void)
215{
216	if (strace_tracer_pid == getpid()) {
217		cflag = 0;
218		cleanup();
219	}
220	exit(1);
221}
222
223static void verror_msg(int err_no, const char *fmt, va_list p)
224{
225	fflush(NULL);
226	fprintf(stderr, "%s: ", progname);
227	vfprintf(stderr, fmt, p);
228	if (err_no)
229		fprintf(stderr, ": %s\n", strerror(err_no));
230	else
231		putc('\n', stderr);
232	fflush(stderr);
233}
234
235void error_msg(const char *fmt, ...)
236{
237	va_list p;
238	va_start(p, fmt);
239	verror_msg(0, fmt, p);
240	va_end(p);
241}
242
243void error_msg_and_die(const char *fmt, ...)
244{
245	va_list p;
246	va_start(p, fmt);
247	verror_msg(0, fmt, p);
248	die();
249}
250
251void perror_msg(const char *fmt, ...)
252{
253	va_list p;
254	va_start(p, fmt);
255	verror_msg(errno, fmt, p);
256	va_end(p);
257}
258
259void perror_msg_and_die(const char *fmt, ...)
260{
261	va_list p;
262	va_start(p, fmt);
263	verror_msg(errno, fmt, p);
264	die();
265}
266
267void die_out_of_memory(void)
268{
269	static bool recursed = 0;
270	if (recursed)
271		exit(1);
272	recursed = 1;
273	error_msg_and_die("Out of memory");
274}
275
276#ifdef SVR4
277#ifdef MIPS
278void
279foobar()
280{
281}
282#endif /* MIPS */
283#endif /* SVR4 */
284
285/* Glue for systems without a MMU that cannot provide fork() */
286#ifdef HAVE_FORK
287# define strace_vforked 0
288#else
289# define strace_vforked 1
290# define fork()         vfork()
291#endif
292
293static void
294set_cloexec_flag(int fd)
295{
296	int flags, newflags;
297
298	flags = fcntl(fd, F_GETFD);
299	if (flags < 0) {
300		/* Can happen only if fd is bad.
301		 * Should never happen: if it does, we have a bug
302		 * in the caller. Therefore we just abort
303		 * instead of propagating the error.
304		 */
305		perror_msg_and_die("fcntl(%d, F_GETFD)", fd);
306	}
307
308	newflags = flags | FD_CLOEXEC;
309	if (flags == newflags)
310		return;
311
312	fcntl(fd, F_SETFD, newflags); /* never fails */
313}
314
315/*
316 * When strace is setuid executable, we have to swap uids
317 * before and after filesystem and process management operations.
318 */
319static void
320swap_uid(void)
321{
322#ifndef SVR4
323	int euid = geteuid(), uid = getuid();
324
325	if (euid != uid && setreuid(euid, uid) < 0) {
326		perror_msg_and_die("setreuid");
327	}
328#endif
329}
330
331#if _LFS64_LARGEFILE
332# define fopen_for_output fopen64
333#else
334# define fopen_for_output fopen
335#endif
336
337static FILE *
338strace_fopen(const char *path)
339{
340	FILE *fp;
341
342	swap_uid();
343	fp = fopen_for_output(path, "w");
344	if (!fp)
345		perror_msg_and_die("Can't fopen '%s'", path);
346	swap_uid();
347	set_cloexec_flag(fileno(fp));
348	return fp;
349}
350
351static int popen_pid = 0;
352
353#ifndef _PATH_BSHELL
354# define _PATH_BSHELL "/bin/sh"
355#endif
356
357/*
358 * We cannot use standard popen(3) here because we have to distinguish
359 * popen child process from other processes we trace, and standard popen(3)
360 * does not export its child's pid.
361 */
362static FILE *
363strace_popen(const char *command)
364{
365	FILE *fp;
366	int fds[2];
367
368	swap_uid();
369	if (pipe(fds) < 0)
370		perror_msg_and_die("pipe");
371
372	set_cloexec_flag(fds[1]); /* never fails */
373
374	popen_pid = vfork();
375	if (popen_pid == -1)
376		perror_msg_and_die("vfork");
377
378	if (popen_pid == 0) {
379		/* child */
380		close(fds[1]);
381		if (fds[0] != 0) {
382			if (dup2(fds[0], 0))
383				perror_msg_and_die("dup2");
384			close(fds[0]);
385		}
386		execl(_PATH_BSHELL, "sh", "-c", command, NULL);
387		perror_msg_and_die("Can't execute '%s'", _PATH_BSHELL);
388	}
389
390	/* parent */
391	close(fds[0]);
392	swap_uid();
393	fp = fdopen(fds[1], "w");
394	if (!fp)
395		die_out_of_memory();
396	return fp;
397}
398
399static void
400newoutf(struct tcb *tcp)
401{
402	if (outfname && followfork > 1) {
403		char name[520 + sizeof(int) * 3];
404		sprintf(name, "%.512s.%u", outfname, tcp->pid);
405		tcp->outf = strace_fopen(name);
406	}
407}
408
409static void
410startup_attach(void)
411{
412	int tcbi;
413	struct tcb *tcp;
414
415	/*
416	 * Block user interruptions as we would leave the traced
417	 * process stopped (process state T) if we would terminate in
418	 * between PTRACE_ATTACH and wait4() on SIGSTOP.
419	 * We rely on cleanup() from this point on.
420	 */
421	if (interactive)
422		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
423
424	if (daemonized_tracer) {
425		pid_t pid = fork();
426		if (pid < 0) {
427			perror_msg_and_die("fork");
428		}
429		if (pid) { /* parent */
430			/*
431			 * Wait for grandchild to attach to straced process
432			 * (grandparent). Grandchild SIGKILLs us after it attached.
433			 * Grandparent's wait() is unblocked by our death,
434			 * it proceeds to exec the straced program.
435			 */
436			pause();
437			_exit(0); /* paranoia */
438		}
439		/* grandchild */
440		/* We will be the tracer process. Remember our new pid: */
441		strace_tracer_pid = getpid();
442	}
443
444	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
445		tcp = tcbtab[tcbi];
446
447		/* Is this a process we should attach to, but not yet attached? */
448		if ((tcp->flags & (TCB_ATTACHED | TCB_STARTUP)) != TCB_ATTACHED)
449			continue; /* no */
450
451		/* Reinitialize the output since it may have changed */
452		tcp->outf = outf;
453		newoutf(tcp);
454
455#ifdef USE_PROCFS
456		if (proc_open(tcp, 1) < 0) {
457			fprintf(stderr, "trouble opening proc file\n");
458			droptcb(tcp);
459			continue;
460		}
461#else /* !USE_PROCFS */
462# ifdef LINUX
463		if (followfork && !daemonized_tracer) {
464			char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
465			DIR *dir;
466
467			sprintf(procdir, "/proc/%d/task", tcp->pid);
468			dir = opendir(procdir);
469			if (dir != NULL) {
470				unsigned int ntid = 0, nerr = 0;
471				struct dirent *de;
472
473				while ((de = readdir(dir)) != NULL) {
474					struct tcb *cur_tcp;
475					int tid;
476
477					if (de->d_fileno == 0)
478						continue;
479					tid = atoi(de->d_name);
480					if (tid <= 0)
481						continue;
482					++ntid;
483					if (ptrace(PTRACE_ATTACH, tid, (char *) 1, 0) < 0) {
484						++nerr;
485						if (debug)
486							fprintf(stderr, "attach to pid %d failed\n", tid);
487						continue;
488					}
489					if (debug)
490						fprintf(stderr, "attach to pid %d succeeded\n", tid);
491					cur_tcp = tcp;
492					if (tid != tcp->pid)
493						cur_tcp = alloctcb(tid);
494					cur_tcp->flags |= TCB_ATTACHED | TCB_STARTUP | TCB_IGNORE_ONE_SIGSTOP;
495				}
496				closedir(dir);
497				if (interactive) {
498					sigprocmask(SIG_SETMASK, &empty_set, NULL);
499					if (interrupted)
500						goto ret;
501					sigprocmask(SIG_BLOCK, &blocked_set, NULL);
502				}
503				ntid -= nerr;
504				if (ntid == 0) {
505					perror("attach: ptrace(PTRACE_ATTACH, ...)");
506					droptcb(tcp);
507					continue;
508				}
509				if (!qflag) {
510					fprintf(stderr, ntid > 1
511? "Process %u attached with %u threads - interrupt to quit\n"
512: "Process %u attached - interrupt to quit\n",
513						tcp->pid, ntid);
514				}
515				if (!(tcp->flags & TCB_STARTUP)) {
516					/* -p PID, we failed to attach to PID itself
517					 * but did attach to some of its sibling threads.
518					 * Drop PID's tcp.
519					 */
520					droptcb(tcp);
521				}
522				continue;
523			} /* if (opendir worked) */
524		} /* if (-f) */
525# endif /* LINUX */
526		if (ptrace(PTRACE_ATTACH, tcp->pid, (char *) 1, 0) < 0) {
527			perror("attach: ptrace(PTRACE_ATTACH, ...)");
528			droptcb(tcp);
529			continue;
530		}
531		tcp->flags |= TCB_STARTUP | TCB_IGNORE_ONE_SIGSTOP;
532		if (debug)
533			fprintf(stderr, "attach to pid %d (main) succeeded\n", tcp->pid);
534
535		if (daemonized_tracer) {
536			/*
537			 * It is our grandparent we trace, not a -p PID.
538			 * Don't want to just detach on exit, so...
539			 */
540			tcp->flags &= ~TCB_ATTACHED;
541			/*
542			 * Make parent go away.
543			 * Also makes grandparent's wait() unblock.
544			 */
545			kill(getppid(), SIGKILL);
546		}
547
548#endif /* !USE_PROCFS */
549		if (!qflag)
550			fprintf(stderr,
551				"Process %u attached - interrupt to quit\n",
552				tcp->pid);
553	} /* for each tcbtab[] */
554
555 ret:
556	if (interactive)
557		sigprocmask(SIG_SETMASK, &empty_set, NULL);
558}
559
560static void
561startup_child(char **argv)
562{
563	struct stat statbuf;
564	const char *filename;
565	char pathname[MAXPATHLEN];
566	int pid = 0;
567	struct tcb *tcp;
568
569	filename = argv[0];
570	if (strchr(filename, '/')) {
571		if (strlen(filename) > sizeof pathname - 1) {
572			errno = ENAMETOOLONG;
573			perror_msg_and_die("exec");
574		}
575		strcpy(pathname, filename);
576	}
577#ifdef USE_DEBUGGING_EXEC
578	/*
579	 * Debuggers customarily check the current directory
580	 * first regardless of the path but doing that gives
581	 * security geeks a panic attack.
582	 */
583	else if (stat(filename, &statbuf) == 0)
584		strcpy(pathname, filename);
585#endif /* USE_DEBUGGING_EXEC */
586	else {
587		const char *path;
588		int m, n, len;
589
590		for (path = getenv("PATH"); path && *path; path += m) {
591			if (strchr(path, ':')) {
592				n = strchr(path, ':') - path;
593				m = n + 1;
594			}
595			else
596				m = n = strlen(path);
597			if (n == 0) {
598				if (!getcwd(pathname, MAXPATHLEN))
599					continue;
600				len = strlen(pathname);
601			}
602			else if (n > sizeof pathname - 1)
603				continue;
604			else {
605				strncpy(pathname, path, n);
606				len = n;
607			}
608			if (len && pathname[len - 1] != '/')
609				pathname[len++] = '/';
610			strcpy(pathname + len, filename);
611			if (stat(pathname, &statbuf) == 0 &&
612			    /* Accept only regular files
613			       with some execute bits set.
614			       XXX not perfect, might still fail */
615			    S_ISREG(statbuf.st_mode) &&
616			    (statbuf.st_mode & 0111))
617				break;
618		}
619	}
620	if (stat(pathname, &statbuf) < 0) {
621		perror_msg_and_die("Can't stat '%s'", filename);
622	}
623	strace_child = pid = fork();
624	if (pid < 0) {
625		perror_msg_and_die("fork");
626	}
627	if ((pid != 0 && daemonized_tracer) /* -D: parent to become a traced process */
628	 || (pid == 0 && !daemonized_tracer) /* not -D: child to become a traced process */
629	) {
630		pid = getpid();
631		if (outf != stderr)
632			close(fileno(outf));
633#ifdef USE_PROCFS
634# ifdef MIPS
635		/* Kludge for SGI, see proc_open for details. */
636		sa.sa_handler = foobar;
637		sa.sa_flags = 0;
638		sigemptyset(&sa.sa_mask);
639		sigaction(SIGINT, &sa, NULL);
640# endif
641# ifndef FREEBSD
642		pause();
643# else
644		kill(pid, SIGSTOP);
645# endif
646#else /* !USE_PROCFS */
647		if (!daemonized_tracer) {
648			if (ptrace(PTRACE_TRACEME, 0, (char *) 1, 0) < 0) {
649				perror_msg_and_die("ptrace(PTRACE_TRACEME, ...)");
650			}
651			if (debug)
652				kill(pid, SIGSTOP);
653		}
654
655		if (username != NULL) {
656			uid_t run_euid = run_uid;
657			gid_t run_egid = run_gid;
658
659			if (statbuf.st_mode & S_ISUID)
660				run_euid = statbuf.st_uid;
661			if (statbuf.st_mode & S_ISGID)
662				run_egid = statbuf.st_gid;
663			/*
664			 * It is important to set groups before we
665			 * lose privileges on setuid.
666			 */
667			if (initgroups(username, run_gid) < 0) {
668				perror_msg_and_die("initgroups");
669			}
670			if (setregid(run_gid, run_egid) < 0) {
671				perror_msg_and_die("setregid");
672			}
673			if (setreuid(run_uid, run_euid) < 0) {
674				perror_msg_and_die("setreuid");
675			}
676		}
677		else if (geteuid() != 0)
678			setreuid(run_uid, run_uid);
679
680		if (!daemonized_tracer) {
681			/*
682			 * Induce a ptrace stop. Tracer (our parent)
683			 * will resume us with PTRACE_SYSCALL and display
684			 * the immediately following execve syscall.
685			 * Can't do this on NOMMU systems, we are after
686			 * vfork: parent is blocked, stopping would deadlock.
687			 */
688			if (!strace_vforked)
689				kill(pid, SIGSTOP);
690		} else {
691			struct sigaction sv_sigchld;
692			sigaction(SIGCHLD, NULL, &sv_sigchld);
693			/*
694			 * Make sure it is not SIG_IGN, otherwise wait
695			 * will not block.
696			 */
697			signal(SIGCHLD, SIG_DFL);
698			/*
699			 * Wait for grandchild to attach to us.
700			 * It kills child after that, and wait() unblocks.
701			 */
702			alarm(3);
703			wait(NULL);
704			alarm(0);
705			sigaction(SIGCHLD, &sv_sigchld, NULL);
706		}
707#endif /* !USE_PROCFS */
708
709		execv(pathname, argv);
710		perror_msg_and_die("exec");
711	}
712
713	/* We are the tracer */
714
715	if (!daemonized_tracer) {
716		tcp = alloctcb(pid);
717		if (!strace_vforked)
718			tcp->flags |= TCB_STARTUP | TCB_IGNORE_ONE_SIGSTOP;
719		else
720			tcp->flags |= TCB_STARTUP;
721	}
722	else {
723		/* With -D, *we* are child here, IOW: different pid. Fetch it: */
724		strace_tracer_pid = getpid();
725		/* The tracee is our parent: */
726		pid = getppid();
727		tcp = alloctcb(pid);
728		/* We want subsequent startup_attach() to attach to it: */
729		tcp->flags |= TCB_ATTACHED;
730	}
731#ifdef USE_PROCFS
732	if (proc_open(tcp, 0) < 0) {
733		perror_msg_and_die("trouble opening proc file");
734	}
735#endif
736}
737
738#ifdef LINUX
739static void kill_save_errno(pid_t pid, int sig)
740{
741	int saved_errno = errno;
742
743	(void) kill(pid, sig);
744	errno = saved_errno;
745}
746
747/*
748 * Test whether the kernel support PTRACE_O_TRACECLONE et al options.
749 * First fork a new child, call ptrace with PTRACE_SETOPTIONS on it,
750 * and then see which options are supported by the kernel.
751 */
752static void
753test_ptrace_setoptions_followfork(void)
754{
755	int pid, expected_grandchild = 0, found_grandchild = 0;
756	const unsigned int test_options = PTRACE_O_TRACECLONE |
757					  PTRACE_O_TRACEFORK |
758					  PTRACE_O_TRACEVFORK;
759
760	pid = fork();
761	if (pid < 0)
762		perror_msg_and_die("fork");
763	if (pid == 0) {
764		pid = getpid();
765		if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0)
766			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
767					   __func__);
768		kill(pid, SIGSTOP);
769		if (fork() < 0)
770			perror_msg_and_die("fork");
771		_exit(0);
772	}
773
774	while (1) {
775		int status, tracee_pid;
776
777		errno = 0;
778		tracee_pid = wait(&status);
779		if (tracee_pid <= 0) {
780			if (errno == EINTR)
781				continue;
782			else if (errno == ECHILD)
783				break;
784			kill_save_errno(pid, SIGKILL);
785			perror_msg_and_die("%s: unexpected wait result %d",
786					   __func__, tracee_pid);
787		}
788		if (WIFEXITED(status)) {
789			if (WEXITSTATUS(status)) {
790				if (tracee_pid != pid)
791					kill_save_errno(pid, SIGKILL);
792				error_msg_and_die("%s: unexpected exit status %u",
793						  __func__, WEXITSTATUS(status));
794			}
795			continue;
796		}
797		if (WIFSIGNALED(status)) {
798			if (tracee_pid != pid)
799				kill_save_errno(pid, SIGKILL);
800			error_msg_and_die("%s: unexpected signal %u",
801					  __func__, WTERMSIG(status));
802		}
803		if (!WIFSTOPPED(status)) {
804			if (tracee_pid != pid)
805				kill_save_errno(tracee_pid, SIGKILL);
806			kill(pid, SIGKILL);
807			error_msg_and_die("%s: unexpected wait status %x",
808					  __func__, status);
809		}
810		if (tracee_pid != pid) {
811			found_grandchild = tracee_pid;
812			if (ptrace(PTRACE_CONT, tracee_pid, 0, 0) < 0) {
813				kill_save_errno(tracee_pid, SIGKILL);
814				kill_save_errno(pid, SIGKILL);
815				perror_msg_and_die("PTRACE_CONT doesn't work");
816			}
817			continue;
818		}
819		switch (WSTOPSIG(status)) {
820		case SIGSTOP:
821			if (ptrace(PTRACE_SETOPTIONS, pid, 0, test_options) < 0
822			    && errno != EINVAL && errno != EIO)
823				perror_msg("PTRACE_SETOPTIONS");
824			break;
825		case SIGTRAP:
826			if (status >> 16 == PTRACE_EVENT_FORK) {
827				long msg = 0;
828
829				if (ptrace(PTRACE_GETEVENTMSG, pid,
830					   NULL, (long) &msg) == 0)
831					expected_grandchild = msg;
832			}
833			break;
834		}
835		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) {
836			kill_save_errno(pid, SIGKILL);
837			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
838		}
839	}
840	if (expected_grandchild && expected_grandchild == found_grandchild) {
841		ptrace_setoptions |= test_options;
842		if (debug)
843			fprintf(stderr, "ptrace_setoptions = %#x\n",
844				ptrace_setoptions);
845		return;
846	}
847	error_msg("Test for PTRACE_O_TRACECLONE failed, "
848		  "giving up using this feature.");
849}
850
851/*
852 * Test whether the kernel support PTRACE_O_TRACESYSGOOD.
853 * First fork a new child, call ptrace(PTRACE_SETOPTIONS) on it,
854 * and then see whether it will stop with (SIGTRAP | 0x80).
855 *
856 * Use of this option enables correct handling of user-generated SIGTRAPs,
857 * and SIGTRAPs generated by special instructions such as int3 on x86:
858 * _start:	.globl	_start
859 *		int3
860 *		movl	$42, %ebx
861 *		movl	$1, %eax
862 *		int	$0x80
863 * (compile with: "gcc -nostartfiles -nostdlib -o int3 int3.S")
864 */
865static void
866test_ptrace_setoptions_for_all(void)
867{
868	const unsigned int test_options = PTRACE_O_TRACESYSGOOD |
869					  PTRACE_O_TRACEEXEC;
870	int pid;
871	int it_worked = 0;
872
873	pid = fork();
874	if (pid < 0)
875		perror_msg_and_die("fork");
876
877	if (pid == 0) {
878		pid = getpid();
879		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
880			/* Note: exits with exitcode 1 */
881			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
882					   __func__);
883		kill(pid, SIGSTOP);
884		_exit(0); /* parent should see entry into this syscall */
885	}
886
887	while (1) {
888		int status, tracee_pid;
889
890		errno = 0;
891		tracee_pid = wait(&status);
892		if (tracee_pid <= 0) {
893			if (errno == EINTR)
894				continue;
895			kill_save_errno(pid, SIGKILL);
896			perror_msg_and_die("%s: unexpected wait result %d",
897					   __func__, tracee_pid);
898		}
899		if (WIFEXITED(status)) {
900			if (WEXITSTATUS(status) == 0)
901				break;
902			error_msg_and_die("%s: unexpected exit status %u",
903					  __func__, WEXITSTATUS(status));
904		}
905		if (WIFSIGNALED(status)) {
906			error_msg_and_die("%s: unexpected signal %u",
907					  __func__, WTERMSIG(status));
908		}
909		if (!WIFSTOPPED(status)) {
910			kill(pid, SIGKILL);
911			error_msg_and_die("%s: unexpected wait status %x",
912					  __func__, status);
913		}
914		if (WSTOPSIG(status) == SIGSTOP) {
915			/*
916			 * We don't check "options aren't accepted" error.
917			 * If it happens, we'll never get (SIGTRAP | 0x80),
918			 * and thus will decide to not use the option.
919			 * IOW: the outcome of the test will be correct.
920			 */
921			if (ptrace(PTRACE_SETOPTIONS, pid, 0L, test_options) < 0
922			    && errno != EINVAL && errno != EIO)
923				perror_msg("PTRACE_SETOPTIONS");
924		}
925		if (WSTOPSIG(status) == (SIGTRAP | 0x80)) {
926			it_worked = 1;
927		}
928		if (ptrace(PTRACE_SYSCALL, pid, 0L, 0L) < 0) {
929			kill_save_errno(pid, SIGKILL);
930			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
931		}
932	}
933
934	if (it_worked) {
935		syscall_trap_sig = (SIGTRAP | 0x80);
936		ptrace_setoptions |= test_options;
937		if (debug)
938			fprintf(stderr, "ptrace_setoptions = %#x\n",
939				ptrace_setoptions);
940		return;
941	}
942
943	error_msg("Test for PTRACE_O_TRACESYSGOOD failed, "
944		  "giving up using this feature.");
945}
946#endif
947
948int
949main(int argc, char *argv[])
950{
951	struct tcb *tcp;
952	int c, pid = 0;
953	int optF = 0;
954	struct sigaction sa;
955
956	progname = argv[0] ? argv[0] : "strace";
957
958	strace_tracer_pid = getpid();
959
960	/* Allocate the initial tcbtab.  */
961	tcbtabsize = argc;	/* Surely enough for all -p args.  */
962	tcbtab = calloc(tcbtabsize, sizeof(tcbtab[0]));
963	if (!tcbtab)
964		die_out_of_memory();
965	tcp = calloc(tcbtabsize, sizeof(*tcp));
966	if (!tcp)
967		die_out_of_memory();
968	for (c = 0; c < tcbtabsize; c++)
969		tcbtab[c] = tcp++;
970
971	outf = stderr;
972	interactive = 1;
973	set_sortby(DEFAULT_SORTBY);
974	set_personality(DEFAULT_PERSONALITY);
975	qualify("trace=all");
976	qualify("abbrev=all");
977	qualify("verbose=all");
978	qualify("signal=all");
979	while ((c = getopt(argc, argv,
980		"+cCdfFhiqrtTvVxyz"
981#ifndef USE_PROCFS
982		"D"
983#endif
984		"a:e:o:O:p:s:S:u:E:P:")) != EOF) {
985		switch (c) {
986		case 'c':
987			if (cflag == CFLAG_BOTH) {
988				error_msg_and_die("-c and -C are mutually exclusive options");
989			}
990			cflag = CFLAG_ONLY_STATS;
991			break;
992		case 'C':
993			if (cflag == CFLAG_ONLY_STATS) {
994				error_msg_and_die("-c and -C are mutually exclusive options");
995			}
996			cflag = CFLAG_BOTH;
997			break;
998		case 'd':
999			debug++;
1000			break;
1001#ifndef USE_PROCFS
1002		case 'D':
1003			daemonized_tracer = 1;
1004			break;
1005#endif
1006		case 'F':
1007			optF = 1;
1008			break;
1009		case 'f':
1010			followfork++;
1011			break;
1012		case 'h':
1013			usage(stdout, 0);
1014			break;
1015		case 'i':
1016			iflag++;
1017			break;
1018		case 'q':
1019			qflag++;
1020			break;
1021		case 'r':
1022			rflag++;
1023			tflag++;
1024			break;
1025		case 't':
1026			tflag++;
1027			break;
1028		case 'T':
1029			dtime++;
1030			break;
1031		case 'x':
1032			xflag++;
1033			break;
1034		case 'y':
1035			show_fd_path = 1;
1036			break;
1037		case 'v':
1038			qualify("abbrev=none");
1039			break;
1040		case 'V':
1041			printf("%s -- version %s\n", PACKAGE_NAME, VERSION);
1042			exit(0);
1043			break;
1044		case 'z':
1045			not_failing_only = 1;
1046			break;
1047		case 'a':
1048			acolumn = atoi(optarg);
1049			if (acolumn < 0)
1050				error_msg_and_die("Bad column width '%s'", optarg);
1051			break;
1052		case 'e':
1053			qualify(optarg);
1054			break;
1055		case 'o':
1056			outfname = strdup(optarg);
1057			break;
1058		case 'O':
1059			set_overhead(atoi(optarg));
1060			break;
1061		case 'p':
1062			pid = atoi(optarg);
1063			if (pid <= 0) {
1064				error_msg("Invalid process id: '%s'", optarg);
1065				break;
1066			}
1067			if (pid == strace_tracer_pid) {
1068				error_msg("I'm sorry, I can't let you do that, Dave.");
1069				break;
1070			}
1071			tcp = alloc_tcb(pid, 0);
1072			tcp->flags |= TCB_ATTACHED;
1073			pflag_seen++;
1074			break;
1075		case 'P':
1076			tracing_paths = 1;
1077			if (pathtrace_select(optarg)) {
1078				error_msg_and_die("Failed to select path '%s'", optarg);
1079			}
1080			break;
1081		case 's':
1082			max_strlen = atoi(optarg);
1083			if (max_strlen < 0) {
1084				error_msg_and_die("Invalid -s argument: '%s'", optarg);
1085			}
1086			break;
1087		case 'S':
1088			set_sortby(optarg);
1089			break;
1090		case 'u':
1091			username = strdup(optarg);
1092			break;
1093		case 'E':
1094			if (putenv(optarg) < 0)
1095				die_out_of_memory();
1096			break;
1097		default:
1098			usage(stderr, 1);
1099			break;
1100		}
1101	}
1102
1103	acolumn_spaces = malloc(acolumn + 1);
1104	if (!acolumn_spaces)
1105		die_out_of_memory();
1106	memset(acolumn_spaces, ' ', acolumn);
1107	acolumn_spaces[acolumn] = '\0';
1108
1109	if ((optind == argc) == !pflag_seen)
1110		usage(stderr, 1);
1111
1112	if (pflag_seen && daemonized_tracer) {
1113		error_msg_and_die("-D and -p are mutually exclusive options");
1114	}
1115
1116	if (!followfork)
1117		followfork = optF;
1118
1119	if (followfork > 1 && cflag) {
1120		error_msg_and_die("(-c or -C) and -ff are mutually exclusive options");
1121	}
1122
1123	/* See if they want to run as another user. */
1124	if (username != NULL) {
1125		struct passwd *pent;
1126
1127		if (getuid() != 0 || geteuid() != 0) {
1128			error_msg_and_die("You must be root to use the -u option");
1129		}
1130		pent = getpwnam(username);
1131		if (pent == NULL) {
1132			error_msg_and_die("Cannot find user '%s'", username);
1133		}
1134		run_uid = pent->pw_uid;
1135		run_gid = pent->pw_gid;
1136	}
1137	else {
1138		run_uid = getuid();
1139		run_gid = getgid();
1140	}
1141
1142#ifdef LINUX
1143	if (followfork)
1144		test_ptrace_setoptions_followfork();
1145	test_ptrace_setoptions_for_all();
1146#endif
1147
1148	/* Check if they want to redirect the output. */
1149	if (outfname) {
1150		/* See if they want to pipe the output. */
1151		if (outfname[0] == '|' || outfname[0] == '!') {
1152			/*
1153			 * We can't do the <outfname>.PID funny business
1154			 * when using popen, so prohibit it.
1155			 */
1156			if (followfork > 1)
1157				error_msg_and_die("Piping the output and -ff are mutually exclusive");
1158			outf = strace_popen(outfname + 1);
1159		}
1160		else if (followfork <= 1)
1161			outf = strace_fopen(outfname);
1162	}
1163
1164	if (!outfname || outfname[0] == '|' || outfname[0] == '!') {
1165		static char buf[BUFSIZ];
1166		setvbuf(outf, buf, _IOLBF, BUFSIZ);
1167	}
1168	if (outfname && optind < argc) {
1169		interactive = 0;
1170		qflag = 1;
1171	}
1172
1173	/* Valid states here:
1174	   optind < argc	pflag_seen	outfname	interactive
1175	   1			0		0		1
1176	   0			1		0		1
1177	   1			0		1		0
1178	   0			1		1		1
1179	 */
1180
1181	/* STARTUP_CHILD must be called before the signal handlers get
1182	   installed below as they are inherited into the spawned process.
1183	   Also we do not need to be protected by them as during interruption
1184	   in the STARTUP_CHILD mode we kill the spawned process anyway.  */
1185	if (!pflag_seen)
1186		startup_child(&argv[optind]);
1187
1188	sigemptyset(&empty_set);
1189	sigemptyset(&blocked_set);
1190	sa.sa_handler = SIG_IGN;
1191	sigemptyset(&sa.sa_mask);
1192	sa.sa_flags = 0;
1193	sigaction(SIGTTOU, &sa, NULL);
1194	sigaction(SIGTTIN, &sa, NULL);
1195	if (interactive) {
1196		sigaddset(&blocked_set, SIGHUP);
1197		sigaddset(&blocked_set, SIGINT);
1198		sigaddset(&blocked_set, SIGQUIT);
1199		sigaddset(&blocked_set, SIGPIPE);
1200		sigaddset(&blocked_set, SIGTERM);
1201		sa.sa_handler = interrupt;
1202#ifdef SUNOS4
1203		/* POSIX signals on sunos4.1 are a little broken. */
1204		sa.sa_flags = SA_INTERRUPT;
1205#endif /* SUNOS4 */
1206	}
1207	sigaction(SIGHUP, &sa, NULL);
1208	sigaction(SIGINT, &sa, NULL);
1209	sigaction(SIGQUIT, &sa, NULL);
1210	sigaction(SIGPIPE, &sa, NULL);
1211	sigaction(SIGTERM, &sa, NULL);
1212#ifdef USE_PROCFS
1213	sa.sa_handler = reaper;
1214	sigaction(SIGCHLD, &sa, NULL);
1215#else
1216	/* Make sure SIGCHLD has the default action so that waitpid
1217	   definitely works without losing track of children.  The user
1218	   should not have given us a bogus state to inherit, but he might
1219	   have.  Arguably we should detect SIG_IGN here and pass it on
1220	   to children, but probably noone really needs that.  */
1221	sa.sa_handler = SIG_DFL;
1222	sigaction(SIGCHLD, &sa, NULL);
1223#endif /* USE_PROCFS */
1224
1225	if (pflag_seen || daemonized_tracer)
1226		startup_attach();
1227
1228	if (trace() < 0)
1229		exit(1);
1230	cleanup();
1231	fflush(NULL);
1232	if (exit_code > 0xff) {
1233		/* Child was killed by a signal, mimic that.  */
1234		exit_code &= 0xff;
1235		signal(exit_code, SIG_DFL);
1236		raise(exit_code);
1237		/* Paranoia - what if this signal is not fatal?
1238		   Exit with 128 + signo then.  */
1239		exit_code += 128;
1240	}
1241	exit(exit_code);
1242}
1243
1244static void
1245expand_tcbtab(void)
1246{
1247	/* Allocate some more TCBs and expand the table.
1248	   We don't want to relocate the TCBs because our
1249	   callers have pointers and it would be a pain.
1250	   So tcbtab is a table of pointers.  Since we never
1251	   free the TCBs, we allocate a single chunk of many.  */
1252	int i = tcbtabsize;
1253	struct tcb *newtcbs = calloc(tcbtabsize, sizeof(newtcbs[0]));
1254	struct tcb **newtab = realloc(tcbtab, tcbtabsize * 2 * sizeof(tcbtab[0]));
1255	if (!newtab || !newtcbs)
1256		die_out_of_memory();
1257	tcbtabsize *= 2;
1258	tcbtab = newtab;
1259	while (i < tcbtabsize)
1260		tcbtab[i++] = newtcbs++;
1261}
1262
1263struct tcb *
1264alloc_tcb(int pid, int command_options_parsed)
1265{
1266	int i;
1267	struct tcb *tcp;
1268
1269	if (nprocs == tcbtabsize)
1270		expand_tcbtab();
1271
1272	for (i = 0; i < tcbtabsize; i++) {
1273		tcp = tcbtab[i];
1274		if ((tcp->flags & TCB_INUSE) == 0) {
1275			memset(tcp, 0, sizeof(*tcp));
1276			tcp->pid = pid;
1277			tcp->flags = TCB_INUSE;
1278			tcp->outf = outf; /* Initialise to current out file */
1279#if SUPPORTED_PERSONALITIES > 1
1280			tcp->currpers = current_personality;
1281#endif
1282#ifdef USE_PROCFS
1283			tcp->pfd = -1;
1284#endif
1285			nprocs++;
1286			if (debug)
1287				fprintf(stderr, "new tcb for pid %d, active tcbs:%d\n", tcp->pid, nprocs);
1288			if (command_options_parsed)
1289				newoutf(tcp);
1290			return tcp;
1291		}
1292	}
1293	error_msg_and_die("bug in alloc_tcb");
1294}
1295
1296#ifdef USE_PROCFS
1297int
1298proc_open(struct tcb *tcp, int attaching)
1299{
1300	char proc[32];
1301	long arg;
1302#ifdef SVR4
1303	int i;
1304	sysset_t syscalls;
1305	sigset_t signals;
1306	fltset_t faults;
1307#endif
1308#ifndef HAVE_POLLABLE_PROCFS
1309	static int last_pfd;
1310#endif
1311
1312#ifdef HAVE_MP_PROCFS
1313	/* Open the process pseudo-files in /proc. */
1314	sprintf(proc, "/proc/%d/ctl", tcp->pid);
1315	tcp->pfd = open(proc, O_WRONLY|O_EXCL);
1316	if (tcp->pfd < 0) {
1317		perror("strace: open(\"/proc/...\", ...)");
1318		return -1;
1319	}
1320	set_cloexec_flag(tcp->pfd);
1321	sprintf(proc, "/proc/%d/status", tcp->pid);
1322	tcp->pfd_stat = open(proc, O_RDONLY|O_EXCL);
1323	if (tcp->pfd_stat < 0) {
1324		perror("strace: open(\"/proc/...\", ...)");
1325		return -1;
1326	}
1327	set_cloexec_flag(tcp->pfd_stat);
1328	sprintf(proc, "/proc/%d/as", tcp->pid);
1329	tcp->pfd_as = open(proc, O_RDONLY|O_EXCL);
1330	if (tcp->pfd_as < 0) {
1331		perror("strace: open(\"/proc/...\", ...)");
1332		return -1;
1333	}
1334	set_cloexec_flag(tcp->pfd_as);
1335#else
1336	/* Open the process pseudo-file in /proc. */
1337# ifndef FREEBSD
1338	sprintf(proc, "/proc/%d", tcp->pid);
1339	tcp->pfd = open(proc, O_RDWR|O_EXCL);
1340# else
1341	sprintf(proc, "/proc/%d/mem", tcp->pid);
1342	tcp->pfd = open(proc, O_RDWR);
1343# endif
1344	if (tcp->pfd < 0) {
1345		perror("strace: open(\"/proc/...\", ...)");
1346		return -1;
1347	}
1348	set_cloexec_flag(tcp->pfd);
1349#endif
1350#ifdef FREEBSD
1351	sprintf(proc, "/proc/%d/regs", tcp->pid);
1352	tcp->pfd_reg = open(proc, O_RDONLY);
1353	if (tcp->pfd_reg < 0) {
1354		perror("strace: open(\"/proc/.../regs\", ...)");
1355		return -1;
1356	}
1357	if (cflag) {
1358		sprintf(proc, "/proc/%d/status", tcp->pid);
1359		tcp->pfd_status = open(proc, O_RDONLY);
1360		if (tcp->pfd_status < 0) {
1361			perror("strace: open(\"/proc/.../status\", ...)");
1362			return -1;
1363		}
1364	} else
1365		tcp->pfd_status = -1;
1366#endif /* FREEBSD */
1367	rebuild_pollv();
1368	if (!attaching) {
1369		/*
1370		 * Wait for the child to pause.  Because of a race
1371		 * condition we have to poll for the event.
1372		 */
1373		for (;;) {
1374			if (IOCTL_STATUS(tcp) < 0) {
1375				perror("strace: PIOCSTATUS");
1376				return -1;
1377			}
1378			if (tcp->status.PR_FLAGS & PR_ASLEEP)
1379				break;
1380		}
1381	}
1382#ifndef FREEBSD
1383	/* Stop the process so that we own the stop. */
1384	if (IOCTL(tcp->pfd, PIOCSTOP, (char *)NULL) < 0) {
1385		perror("strace: PIOCSTOP");
1386		return -1;
1387	}
1388#endif
1389#ifdef PIOCSET
1390	/* Set Run-on-Last-Close. */
1391	arg = PR_RLC;
1392	if (IOCTL(tcp->pfd, PIOCSET, &arg) < 0) {
1393		perror("PIOCSET PR_RLC");
1394		return -1;
1395	}
1396	/* Set or Reset Inherit-on-Fork. */
1397	arg = PR_FORK;
1398	if (IOCTL(tcp->pfd, followfork ? PIOCSET : PIOCRESET, &arg) < 0) {
1399		perror("PIOC{SET,RESET} PR_FORK");
1400		return -1;
1401	}
1402#else  /* !PIOCSET */
1403#ifndef FREEBSD
1404	if (ioctl(tcp->pfd, PIOCSRLC) < 0) {
1405		perror("PIOCSRLC");
1406		return -1;
1407	}
1408	if (ioctl(tcp->pfd, followfork ? PIOCSFORK : PIOCRFORK) < 0) {
1409		perror("PIOC{S,R}FORK");
1410		return -1;
1411	}
1412#else /* FREEBSD */
1413	/* just unset the PF_LINGER flag for the Run-on-Last-Close. */
1414	if (ioctl(tcp->pfd, PIOCGFL, &arg) < 0) {
1415	        perror("PIOCGFL");
1416		return -1;
1417	}
1418	arg &= ~PF_LINGER;
1419	if (ioctl(tcp->pfd, PIOCSFL, arg) < 0) {
1420		perror("PIOCSFL");
1421		return -1;
1422	}
1423#endif /* FREEBSD */
1424#endif /* !PIOCSET */
1425#ifndef FREEBSD
1426	/* Enable all syscall entries we care about. */
1427	premptyset(&syscalls);
1428	for (i = 1; i < MAX_QUALS; ++i) {
1429		if (i > (sizeof syscalls) * CHAR_BIT) break;
1430		if (qual_flags[i] & QUAL_TRACE) praddset(&syscalls, i);
1431	}
1432	praddset(&syscalls, SYS_execve);
1433	if (followfork) {
1434		praddset(&syscalls, SYS_fork);
1435#ifdef SYS_forkall
1436		praddset(&syscalls, SYS_forkall);
1437#endif
1438#ifdef SYS_fork1
1439		praddset(&syscalls, SYS_fork1);
1440#endif
1441#ifdef SYS_rfork1
1442		praddset(&syscalls, SYS_rfork1);
1443#endif
1444#ifdef SYS_rforkall
1445		praddset(&syscalls, SYS_rforkall);
1446#endif
1447	}
1448	if (IOCTL(tcp->pfd, PIOCSENTRY, &syscalls) < 0) {
1449		perror("PIOCSENTRY");
1450		return -1;
1451	}
1452	/* Enable the syscall exits. */
1453	if (IOCTL(tcp->pfd, PIOCSEXIT, &syscalls) < 0) {
1454		perror("PIOSEXIT");
1455		return -1;
1456	}
1457	/* Enable signals we care about. */
1458	premptyset(&signals);
1459	for (i = 1; i < MAX_QUALS; ++i) {
1460		if (i > (sizeof signals) * CHAR_BIT) break;
1461		if (qual_flags[i] & QUAL_SIGNAL) praddset(&signals, i);
1462	}
1463	if (IOCTL(tcp->pfd, PIOCSTRACE, &signals) < 0) {
1464		perror("PIOCSTRACE");
1465		return -1;
1466	}
1467	/* Enable faults we care about */
1468	premptyset(&faults);
1469	for (i = 1; i < MAX_QUALS; ++i) {
1470		if (i > (sizeof faults) * CHAR_BIT) break;
1471		if (qual_flags[i] & QUAL_FAULT) praddset(&faults, i);
1472	}
1473	if (IOCTL(tcp->pfd, PIOCSFAULT, &faults) < 0) {
1474		perror("PIOCSFAULT");
1475		return -1;
1476	}
1477#else /* FREEBSD */
1478	/* set events flags. */
1479	arg = S_SIG | S_SCE | S_SCX;
1480	if (ioctl(tcp->pfd, PIOCBIS, arg) < 0) {
1481		perror("PIOCBIS");
1482		return -1;
1483	}
1484#endif /* FREEBSD */
1485	if (!attaching) {
1486#ifdef MIPS
1487		/*
1488		 * The SGI PRSABORT doesn't work for pause() so
1489		 * we send it a caught signal to wake it up.
1490		 */
1491		kill(tcp->pid, SIGINT);
1492#else /* !MIPS */
1493#ifdef PRSABORT
1494		/* The child is in a pause(), abort it. */
1495		arg = PRSABORT;
1496		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0) {
1497			perror("PIOCRUN");
1498			return -1;
1499		}
1500#endif
1501#endif /* !MIPS*/
1502#ifdef FREEBSD
1503		/* wake up the child if it received the SIGSTOP */
1504		kill(tcp->pid, SIGCONT);
1505#endif
1506		for (;;) {
1507			/* Wait for the child to do something. */
1508			if (IOCTL_WSTOP(tcp) < 0) {
1509				perror("PIOCWSTOP");
1510				return -1;
1511			}
1512			if (tcp->status.PR_WHY == PR_SYSENTRY) {
1513				tcp->flags &= ~TCB_INSYSCALL;
1514				get_scno(tcp);
1515				if (known_scno(tcp) == SYS_execve)
1516					break;
1517			}
1518			/* Set it running: maybe execve will be next. */
1519#ifndef FREEBSD
1520			arg = 0;
1521			if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0)
1522#else
1523			if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0)
1524#endif
1525			{
1526				perror("PIOCRUN");
1527				return -1;
1528			}
1529#ifdef FREEBSD
1530			/* handle the case where we "opened" the child before
1531			   it did the kill -STOP */
1532			if (tcp->status.PR_WHY == PR_SIGNALLED &&
1533			    tcp->status.PR_WHAT == SIGSTOP)
1534			        kill(tcp->pid, SIGCONT);
1535#endif
1536		}
1537	}
1538#ifdef FREEBSD
1539	else {
1540		if (attaching < 2) {
1541			/* We are attaching to an already running process.
1542			 * Try to figure out the state of the process in syscalls,
1543			 * to handle the first event well.
1544			 * This is done by having a look at the "wchan" property of the
1545			 * process, which tells where it is stopped (if it is). */
1546			FILE * status;
1547			char wchan[20]; /* should be enough */
1548
1549			sprintf(proc, "/proc/%d/status", tcp->pid);
1550			status = fopen(proc, "r");
1551			if (status &&
1552			    (fscanf(status, "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d"
1553				    "%*d,%*d %*d,%*d %19s", wchan) == 1) &&
1554			    strcmp(wchan, "nochan") && strcmp(wchan, "spread") &&
1555			    strcmp(wchan, "stopevent")) {
1556				/* The process is asleep in the middle of a syscall.
1557				   Fake the syscall entry event */
1558				tcp->flags &= ~(TCB_INSYSCALL|TCB_STARTUP);
1559				tcp->status.PR_WHY = PR_SYSENTRY;
1560				trace_syscall(tcp);
1561			}
1562			if (status)
1563				fclose(status);
1564		} /* otherwise it's a fork being followed */
1565	}
1566#endif /* FREEBSD */
1567#ifndef HAVE_POLLABLE_PROCFS
1568	if (proc_poll_pipe[0] != -1)
1569		proc_poller(tcp->pfd);
1570	else if (nprocs > 1) {
1571		proc_poll_open();
1572		proc_poller(last_pfd);
1573		proc_poller(tcp->pfd);
1574	}
1575	last_pfd = tcp->pfd;
1576#endif /* !HAVE_POLLABLE_PROCFS */
1577	return 0;
1578}
1579
1580#endif /* USE_PROCFS */
1581
1582struct tcb *
1583pid2tcb(int pid)
1584{
1585	int i;
1586
1587	if (pid <= 0)
1588		return NULL;
1589
1590	for (i = 0; i < tcbtabsize; i++) {
1591		struct tcb *tcp = tcbtab[i];
1592		if (tcp->pid == pid && (tcp->flags & TCB_INUSE))
1593			return tcp;
1594	}
1595
1596	return NULL;
1597}
1598
1599#ifdef USE_PROCFS
1600
1601static struct tcb *
1602first_used_tcb(void)
1603{
1604	int i;
1605	struct tcb *tcp;
1606	for (i = 0; i < tcbtabsize; i++) {
1607		tcp = tcbtab[i];
1608		if (tcp->flags & TCB_INUSE)
1609			return tcp;
1610	}
1611	return NULL;
1612}
1613
1614static struct tcb *
1615pfd2tcb(int pfd)
1616{
1617	int i;
1618
1619	for (i = 0; i < tcbtabsize; i++) {
1620		struct tcb *tcp = tcbtab[i];
1621		if (tcp->pfd != pfd)
1622			continue;
1623		if (tcp->flags & TCB_INUSE)
1624			return tcp;
1625	}
1626	return NULL;
1627}
1628
1629#endif /* USE_PROCFS */
1630
1631void
1632droptcb(struct tcb *tcp)
1633{
1634	if (tcp->pid == 0)
1635		return;
1636
1637	nprocs--;
1638	if (debug)
1639		fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
1640
1641#ifdef USE_PROCFS
1642	if (tcp->pfd != -1) {
1643		close(tcp->pfd);
1644		tcp->pfd = -1;
1645# ifdef FREEBSD
1646		if (tcp->pfd_reg != -1) {
1647		        close(tcp->pfd_reg);
1648		        tcp->pfd_reg = -1;
1649		}
1650		if (tcp->pfd_status != -1) {
1651			close(tcp->pfd_status);
1652			tcp->pfd_status = -1;
1653		}
1654# endif
1655		tcp->flags = 0; /* rebuild_pollv needs it */
1656		rebuild_pollv();
1657	}
1658#endif
1659
1660	if (outfname && followfork > 1 && tcp->outf)
1661		fclose(tcp->outf);
1662
1663	memset(tcp, 0, sizeof(*tcp));
1664}
1665
1666/* detach traced process; continue with sig
1667   Never call DETACH twice on the same process as both unattached and
1668   attached-unstopped processes give the same ESRCH.  For unattached process we
1669   would SIGSTOP it and wait for its SIGSTOP notification forever.  */
1670
1671static int
1672detach(struct tcb *tcp)
1673{
1674	int error = 0;
1675#ifdef LINUX
1676	int status, catch_sigstop;
1677#endif
1678
1679	if (tcp->flags & TCB_BPTSET)
1680		clearbpt(tcp);
1681
1682#ifdef LINUX
1683	/*
1684	 * Linux wrongly insists the child be stopped
1685	 * before detaching.  Arghh.  We go through hoops
1686	 * to make a clean break of things.
1687	 */
1688#if defined(SPARC)
1689#undef PTRACE_DETACH
1690#define PTRACE_DETACH PTRACE_SUNDETACH
1691#endif
1692	/*
1693	 * We did PTRACE_ATTACH but possibly didn't see the expected SIGSTOP.
1694	 * We must catch exactly one as otherwise the detached process
1695	 * would be left stopped (process state T).
1696	 */
1697	catch_sigstop = (tcp->flags & TCB_IGNORE_ONE_SIGSTOP);
1698	error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, 0);
1699	if (error == 0) {
1700		/* On a clear day, you can see forever. */
1701	}
1702	else if (errno != ESRCH) {
1703		/* Shouldn't happen. */
1704		perror("detach: ptrace(PTRACE_DETACH, ...)");
1705	}
1706	else if (my_tkill(tcp->pid, 0) < 0) {
1707		if (errno != ESRCH)
1708			perror("detach: checking sanity");
1709	}
1710	else if (!catch_sigstop && my_tkill(tcp->pid, SIGSTOP) < 0) {
1711		if (errno != ESRCH)
1712			perror("detach: stopping child");
1713	}
1714	else
1715		catch_sigstop = 1;
1716	if (catch_sigstop) {
1717		for (;;) {
1718#ifdef __WALL
1719			if (wait4(tcp->pid, &status, __WALL, NULL) < 0) {
1720				if (errno == ECHILD) /* Already gone.  */
1721					break;
1722				if (errno != EINVAL) {
1723					perror("detach: waiting");
1724					break;
1725				}
1726#endif /* __WALL */
1727				/* No __WALL here.  */
1728				if (waitpid(tcp->pid, &status, 0) < 0) {
1729					if (errno != ECHILD) {
1730						perror("detach: waiting");
1731						break;
1732					}
1733#ifdef __WCLONE
1734					/* If no processes, try clones.  */
1735					if (wait4(tcp->pid, &status, __WCLONE,
1736						  NULL) < 0) {
1737						if (errno != ECHILD)
1738							perror("detach: waiting");
1739						break;
1740					}
1741#endif /* __WCLONE */
1742				}
1743#ifdef __WALL
1744			}
1745#endif
1746			if (!WIFSTOPPED(status)) {
1747				/* Au revoir, mon ami. */
1748				break;
1749			}
1750			if (WSTOPSIG(status) == SIGSTOP) {
1751				ptrace_restart(PTRACE_DETACH, tcp, 0);
1752				break;
1753			}
1754			error = ptrace_restart(PTRACE_CONT, tcp,
1755					WSTOPSIG(status) == syscall_trap_sig ? 0
1756					: WSTOPSIG(status));
1757			if (error < 0)
1758				break;
1759		}
1760	}
1761#endif /* LINUX */
1762
1763#if defined(SUNOS4)
1764	/* PTRACE_DETACH won't respect `sig' argument, so we post it here. */
1765	error = ptrace_restart(PTRACE_DETACH, tcp, 0);
1766#endif /* SUNOS4 */
1767
1768	if (!qflag)
1769		fprintf(stderr, "Process %u detached\n", tcp->pid);
1770
1771	droptcb(tcp);
1772
1773	return error;
1774}
1775
1776#ifdef USE_PROCFS
1777
1778static void reaper(int sig)
1779{
1780	int pid;
1781	int status;
1782
1783	while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
1784	}
1785}
1786
1787#endif /* USE_PROCFS */
1788
1789static void
1790cleanup(void)
1791{
1792	int i;
1793	struct tcb *tcp;
1794
1795	for (i = 0; i < tcbtabsize; i++) {
1796		tcp = tcbtab[i];
1797		if (!(tcp->flags & TCB_INUSE))
1798			continue;
1799		if (debug)
1800			fprintf(stderr,
1801				"cleanup: looking at pid %u\n", tcp->pid);
1802		if (tcp_last &&
1803		    (!outfname || followfork < 2 || tcp_last == tcp)) {
1804			tprints(" <unfinished ...>");
1805			printtrailer();
1806		}
1807		if (tcp->flags & TCB_ATTACHED)
1808			detach(tcp);
1809		else {
1810			kill(tcp->pid, SIGCONT);
1811			kill(tcp->pid, SIGTERM);
1812		}
1813	}
1814	if (cflag)
1815		call_summary(outf);
1816}
1817
1818static void
1819interrupt(int sig)
1820{
1821	interrupted = 1;
1822}
1823
1824#ifndef HAVE_STRERROR
1825
1826#if !HAVE_DECL_SYS_ERRLIST
1827extern int sys_nerr;
1828extern char *sys_errlist[];
1829#endif /* HAVE_DECL_SYS_ERRLIST */
1830
1831const char *
1832strerror(int err_no)
1833{
1834	static char buf[64];
1835
1836	if (err_no < 1 || err_no >= sys_nerr) {
1837		sprintf(buf, "Unknown error %d", err_no);
1838		return buf;
1839	}
1840	return sys_errlist[err_no];
1841}
1842
1843#endif /* HAVE_STERRROR */
1844
1845#ifndef HAVE_STRSIGNAL
1846
1847#if defined HAVE_SYS_SIGLIST && !defined HAVE_DECL_SYS_SIGLIST
1848extern char *sys_siglist[];
1849#endif
1850#if defined HAVE_SYS__SIGLIST && !defined HAVE_DECL__SYS_SIGLIST
1851extern char *_sys_siglist[];
1852#endif
1853
1854const char *
1855strsignal(int sig)
1856{
1857	static char buf[64];
1858
1859	if (sig < 1 || sig >= NSIG) {
1860		sprintf(buf, "Unknown signal %d", sig);
1861		return buf;
1862	}
1863#ifdef HAVE__SYS_SIGLIST
1864	return _sys_siglist[sig];
1865#else
1866	return sys_siglist[sig];
1867#endif
1868}
1869
1870#endif /* HAVE_STRSIGNAL */
1871
1872#ifdef USE_PROCFS
1873
1874static void
1875rebuild_pollv(void)
1876{
1877	int i, j;
1878
1879	free(pollv);
1880	pollv = malloc(nprocs * sizeof(pollv[0]));
1881	if (!pollv)
1882		die_out_of_memory();
1883
1884	for (i = j = 0; i < tcbtabsize; i++) {
1885		struct tcb *tcp = tcbtab[i];
1886		if (!(tcp->flags & TCB_INUSE))
1887			continue;
1888		pollv[j].fd = tcp->pfd;
1889		pollv[j].events = POLLWANT;
1890		j++;
1891	}
1892	if (j != nprocs) {
1893		error_msg_and_die("proc miscount");
1894	}
1895}
1896
1897#ifndef HAVE_POLLABLE_PROCFS
1898
1899static void
1900proc_poll_open(void)
1901{
1902	int i;
1903
1904	if (pipe(proc_poll_pipe) < 0) {
1905		perror_msg_and_die("pipe");
1906	}
1907	for (i = 0; i < 2; i++) {
1908		set_cloexec_flag(proc_poll_pipe[i]);
1909	}
1910}
1911
1912static int
1913proc_poll(struct pollfd *pollv, int nfds, int timeout)
1914{
1915	int i;
1916	int n;
1917	struct proc_pollfd pollinfo;
1918
1919	n = read(proc_poll_pipe[0], &pollinfo, sizeof(pollinfo));
1920	if (n < 0)
1921		return n;
1922	if (n != sizeof(struct proc_pollfd)) {
1923		error_msg_and_die("panic: short read: %d", n);
1924	}
1925	for (i = 0; i < nprocs; i++) {
1926		if (pollv[i].fd == pollinfo.fd)
1927			pollv[i].revents = pollinfo.revents;
1928		else
1929			pollv[i].revents = 0;
1930	}
1931	poller_pid = pollinfo.pid;
1932	return 1;
1933}
1934
1935static void
1936wakeup_handler(int sig)
1937{
1938}
1939
1940static void
1941proc_poller(int pfd)
1942{
1943	struct proc_pollfd pollinfo;
1944	struct sigaction sa;
1945	sigset_t blocked_set, empty_set;
1946	int i;
1947	int n;
1948	struct rlimit rl;
1949#ifdef FREEBSD
1950	struct procfs_status pfs;
1951#endif /* FREEBSD */
1952
1953	switch (fork()) {
1954	case -1:
1955		perror_msg_and_die("fork");
1956	case 0:
1957		break;
1958	default:
1959		return;
1960	}
1961
1962	sa.sa_handler = interactive ? SIG_DFL : SIG_IGN;
1963	sa.sa_flags = 0;
1964	sigemptyset(&sa.sa_mask);
1965	sigaction(SIGHUP, &sa, NULL);
1966	sigaction(SIGINT, &sa, NULL);
1967	sigaction(SIGQUIT, &sa, NULL);
1968	sigaction(SIGPIPE, &sa, NULL);
1969	sigaction(SIGTERM, &sa, NULL);
1970	sa.sa_handler = wakeup_handler;
1971	sigaction(SIGUSR1, &sa, NULL);
1972	sigemptyset(&blocked_set);
1973	sigaddset(&blocked_set, SIGUSR1);
1974	sigprocmask(SIG_BLOCK, &blocked_set, NULL);
1975	sigemptyset(&empty_set);
1976
1977	if (getrlimit(RLIMIT_NOFILE, &rl) < 0) {
1978		perror_msg_and_die("getrlimit(RLIMIT_NOFILE, ...)");
1979	}
1980	n = rl.rlim_cur;
1981	for (i = 0; i < n; i++) {
1982		if (i != pfd && i != proc_poll_pipe[1])
1983			close(i);
1984	}
1985
1986	pollinfo.fd = pfd;
1987	pollinfo.pid = getpid();
1988	for (;;) {
1989#ifndef FREEBSD
1990		if (ioctl(pfd, PIOCWSTOP, NULL) < 0)
1991#else
1992		if (ioctl(pfd, PIOCWSTOP, &pfs) < 0)
1993#endif
1994		{
1995			switch (errno) {
1996			case EINTR:
1997				continue;
1998			case EBADF:
1999				pollinfo.revents = POLLERR;
2000				break;
2001			case ENOENT:
2002				pollinfo.revents = POLLHUP;
2003				break;
2004			default:
2005				perror("proc_poller: PIOCWSTOP");
2006			}
2007			write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
2008			_exit(0);
2009		}
2010		pollinfo.revents = POLLWANT;
2011		write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
2012		sigsuspend(&empty_set);
2013	}
2014}
2015
2016#endif /* !HAVE_POLLABLE_PROCFS */
2017
2018static int
2019choose_pfd()
2020{
2021	int i, j;
2022	struct tcb *tcp;
2023
2024	static int last;
2025
2026	if (followfork < 2 &&
2027	    last < nprocs && (pollv[last].revents & POLLWANT)) {
2028		/*
2029		 * The previous process is ready to run again.  We'll
2030		 * let it do so if it is currently in a syscall.  This
2031		 * heuristic improves the readability of the trace.
2032		 */
2033		tcp = pfd2tcb(pollv[last].fd);
2034		if (tcp && exiting(tcp))
2035			return pollv[last].fd;
2036	}
2037
2038	for (i = 0; i < nprocs; i++) {
2039		/* Let competing children run round robin. */
2040		j = (i + last + 1) % nprocs;
2041		if (pollv[j].revents & (POLLHUP | POLLERR)) {
2042			tcp = pfd2tcb(pollv[j].fd);
2043			if (!tcp) {
2044				error_msg_and_die("lost proc");
2045			}
2046			droptcb(tcp);
2047			return -1;
2048		}
2049		if (pollv[j].revents & POLLWANT) {
2050			last = j;
2051			return pollv[j].fd;
2052		}
2053	}
2054	error_msg_and_die("nothing ready");
2055}
2056
2057static int
2058trace(void)
2059{
2060#ifdef POLL_HACK
2061	struct tcb *in_syscall = NULL;
2062#endif
2063	struct tcb *tcp;
2064	int pfd;
2065	int what;
2066	int ioctl_result = 0, ioctl_errno = 0;
2067	long arg;
2068
2069	for (;;) {
2070		if (interactive)
2071			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2072
2073		if (nprocs == 0)
2074			break;
2075
2076		switch (nprocs) {
2077		case 1:
2078#ifndef HAVE_POLLABLE_PROCFS
2079			if (proc_poll_pipe[0] == -1) {
2080#endif
2081				tcp = first_used_tcb();
2082				if (!tcp)
2083					continue;
2084				pfd = tcp->pfd;
2085				if (pfd == -1)
2086					continue;
2087				break;
2088#ifndef HAVE_POLLABLE_PROCFS
2089			}
2090			/* fall through ... */
2091#endif /* !HAVE_POLLABLE_PROCFS */
2092		default:
2093#ifdef HAVE_POLLABLE_PROCFS
2094#ifdef POLL_HACK
2095		        /* On some systems (e.g. UnixWare) we get too much ugly
2096			   "unfinished..." stuff when multiple proceses are in
2097			   syscalls.  Here's a nasty hack */
2098
2099			if (in_syscall) {
2100				struct pollfd pv;
2101				tcp = in_syscall;
2102				in_syscall = NULL;
2103				pv.fd = tcp->pfd;
2104				pv.events = POLLWANT;
2105				what = poll(&pv, 1, 1);
2106				if (what < 0) {
2107					if (interrupted)
2108						return 0;
2109					continue;
2110				}
2111				else if (what == 1 && pv.revents & POLLWANT) {
2112					goto FOUND;
2113				}
2114			}
2115#endif
2116
2117			if (poll(pollv, nprocs, INFTIM) < 0) {
2118				if (interrupted)
2119					return 0;
2120				continue;
2121			}
2122#else /* !HAVE_POLLABLE_PROCFS */
2123			if (proc_poll(pollv, nprocs, INFTIM) < 0) {
2124				if (interrupted)
2125					return 0;
2126				continue;
2127			}
2128#endif /* !HAVE_POLLABLE_PROCFS */
2129			pfd = choose_pfd();
2130			if (pfd == -1)
2131				continue;
2132			break;
2133		}
2134
2135		/* Look up `pfd' in our table. */
2136		tcp = pfd2tcb(pfd);
2137		if (tcp == NULL) {
2138			error_msg_and_die("unknown pfd: %u", pfd);
2139		}
2140#ifdef POLL_HACK
2141	FOUND:
2142#endif
2143		/* Get the status of the process. */
2144		if (!interrupted) {
2145#ifndef FREEBSD
2146			ioctl_result = IOCTL_WSTOP(tcp);
2147#else /* FREEBSD */
2148			/* Thanks to some scheduling mystery, the first poller
2149			   sometimes waits for the already processed end of fork
2150			   event. Doing a non blocking poll here solves the problem. */
2151			if (proc_poll_pipe[0] != -1)
2152				ioctl_result = IOCTL_STATUS(tcp);
2153			else
2154				ioctl_result = IOCTL_WSTOP(tcp);
2155#endif /* FREEBSD */
2156			ioctl_errno = errno;
2157#ifndef HAVE_POLLABLE_PROCFS
2158			if (proc_poll_pipe[0] != -1) {
2159				if (ioctl_result < 0)
2160					kill(poller_pid, SIGKILL);
2161				else
2162					kill(poller_pid, SIGUSR1);
2163			}
2164#endif /* !HAVE_POLLABLE_PROCFS */
2165		}
2166		if (interrupted)
2167			return 0;
2168
2169		if (interactive)
2170			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2171
2172		if (ioctl_result < 0) {
2173			/* Find out what happened if it failed. */
2174			switch (ioctl_errno) {
2175			case EINTR:
2176			case EBADF:
2177				continue;
2178#ifdef FREEBSD
2179			case ENOTTY:
2180#endif
2181			case ENOENT:
2182				droptcb(tcp);
2183				continue;
2184			default:
2185				perror_msg_and_die("PIOCWSTOP");
2186			}
2187		}
2188
2189#ifdef FREEBSD
2190		if ((tcp->flags & TCB_STARTUP) && (tcp->status.PR_WHY == PR_SYSEXIT)) {
2191			/* discard first event for a syscall we never entered */
2192			IOCTL(tcp->pfd, PIOCRUN, 0);
2193			continue;
2194		}
2195#endif
2196
2197		/* clear the just started flag */
2198		tcp->flags &= ~TCB_STARTUP;
2199
2200		/* set current output file */
2201		outf = tcp->outf;
2202		curcol = tcp->curcol;
2203
2204		if (cflag) {
2205			struct timeval stime;
2206#ifdef FREEBSD
2207			char buf[1024];
2208			int len;
2209
2210			len = pread(tcp->pfd_status, buf, sizeof(buf) - 1, 0);
2211			if (len > 0) {
2212				buf[len] = '\0';
2213				sscanf(buf,
2214				       "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d %*d,%*d %ld,%ld",
2215				       &stime.tv_sec, &stime.tv_usec);
2216			} else
2217				stime.tv_sec = stime.tv_usec = 0;
2218#else /* !FREEBSD */
2219			stime.tv_sec = tcp->status.pr_stime.tv_sec;
2220			stime.tv_usec = tcp->status.pr_stime.tv_nsec/1000;
2221#endif /* !FREEBSD */
2222			tv_sub(&tcp->dtime, &stime, &tcp->stime);
2223			tcp->stime = stime;
2224		}
2225		what = tcp->status.PR_WHAT;
2226		switch (tcp->status.PR_WHY) {
2227#ifndef FREEBSD
2228		case PR_REQUESTED:
2229			if (tcp->status.PR_FLAGS & PR_ASLEEP) {
2230				tcp->status.PR_WHY = PR_SYSENTRY;
2231				if (trace_syscall(tcp) < 0) {
2232					error_msg_and_die("syscall trouble");
2233				}
2234			}
2235			break;
2236#endif /* !FREEBSD */
2237		case PR_SYSENTRY:
2238#ifdef POLL_HACK
2239		        in_syscall = tcp;
2240#endif
2241		case PR_SYSEXIT:
2242			if (trace_syscall(tcp) < 0) {
2243				error_msg_and_die("syscall trouble");
2244			}
2245			break;
2246		case PR_SIGNALLED:
2247			if (cflag != CFLAG_ONLY_STATS
2248			    && (qual_flags[what] & QUAL_SIGNAL)) {
2249				printleader(tcp);
2250				tprintf("--- %s (%s) ---",
2251					signame(what), strsignal(what));
2252				printtrailer();
2253#ifdef PR_INFO
2254				if (tcp->status.PR_INFO.si_signo == what) {
2255					printleader(tcp);
2256					tprints("    siginfo=");
2257					printsiginfo(&tcp->status.PR_INFO, 1);
2258					printtrailer();
2259				}
2260#endif
2261			}
2262			break;
2263		case PR_FAULTED:
2264			if (cflag != CFLAGS_ONLY_STATS
2265			    && (qual_flags[what] & QUAL_FAULT)) {
2266				printleader(tcp);
2267				tprintf("=== FAULT %d ===", what);
2268				printtrailer();
2269			}
2270			break;
2271#ifdef FREEBSD
2272		case 0: /* handle case we polled for nothing */
2273			continue;
2274#endif
2275		default:
2276			error_msg_and_die("odd stop %d", tcp->status.PR_WHY);
2277			break;
2278		}
2279		/* Remember current print column before continuing. */
2280		tcp->curcol = curcol;
2281		arg = 0;
2282#ifndef FREEBSD
2283		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0)
2284#else
2285		if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0)
2286#endif
2287		{
2288			perror_msg_and_die("PIOCRUN");
2289		}
2290	}
2291	return 0;
2292}
2293
2294#else /* !USE_PROCFS */
2295
2296static int
2297trace()
2298{
2299	int pid;
2300	int wait_errno;
2301	int status, sig;
2302	struct tcb *tcp;
2303#ifdef LINUX
2304	struct rusage ru;
2305	struct rusage *rup = cflag ? &ru : NULL;
2306# ifdef __WALL
2307	static int wait4_options = __WALL;
2308# endif
2309#endif /* LINUX */
2310
2311	while (nprocs != 0) {
2312		if (interrupted)
2313			return 0;
2314		if (interactive)
2315			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2316#ifdef LINUX
2317# ifdef __WALL
2318		pid = wait4(-1, &status, wait4_options, rup);
2319		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
2320			/* this kernel does not support __WALL */
2321			wait4_options &= ~__WALL;
2322			pid = wait4(-1, &status, wait4_options, rup);
2323		}
2324		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
2325			/* most likely a "cloned" process */
2326			pid = wait4(-1, &status, __WCLONE, rup);
2327			if (pid < 0) {
2328				perror_msg("wait4(__WCLONE) failed");
2329			}
2330		}
2331# else
2332		pid = wait4(-1, &status, 0, rup);
2333# endif /* __WALL */
2334#endif /* LINUX */
2335#ifdef SUNOS4
2336		pid = wait(&status);
2337#endif
2338		wait_errno = errno;
2339		if (interactive)
2340			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2341
2342		if (pid < 0) {
2343			switch (wait_errno) {
2344			case EINTR:
2345				continue;
2346			case ECHILD:
2347				/*
2348				 * We would like to verify this case
2349				 * but sometimes a race in Solbourne's
2350				 * version of SunOS sometimes reports
2351				 * ECHILD before sending us SIGCHILD.
2352				 */
2353				return 0;
2354			default:
2355				errno = wait_errno;
2356				perror("strace: wait");
2357				return -1;
2358			}
2359		}
2360		if (pid == popen_pid) {
2361			if (WIFEXITED(status) || WIFSIGNALED(status))
2362				popen_pid = 0;
2363			continue;
2364		}
2365		if (debug) {
2366			char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
2367#ifdef LINUX
2368			unsigned ev = (unsigned)status >> 16;
2369			if (ev) {
2370				static const char *const event_names[] = {
2371					[PTRACE_EVENT_CLONE] = "CLONE",
2372					[PTRACE_EVENT_FORK]  = "FORK",
2373					[PTRACE_EVENT_VFORK] = "VFORK",
2374					[PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
2375					[PTRACE_EVENT_EXEC]  = "EXEC",
2376					[PTRACE_EVENT_EXIT]  = "EXIT",
2377				};
2378				const char *e;
2379				if (ev < ARRAY_SIZE(event_names))
2380					e = event_names[ev];
2381				else {
2382					sprintf(buf, "?? (%u)", ev);
2383					e = buf;
2384				}
2385				fprintf(stderr, " PTRACE_EVENT_%s", e);
2386			}
2387#endif
2388			strcpy(buf, "???");
2389			if (WIFSIGNALED(status))
2390#ifdef WCOREDUMP
2391				sprintf(buf, "WIFSIGNALED,%ssig=%s",
2392						WCOREDUMP(status) ? "core," : "",
2393						signame(WTERMSIG(status)));
2394#else
2395				sprintf(buf, "WIFSIGNALED,sig=%s",
2396						signame(WTERMSIG(status)));
2397#endif
2398			if (WIFEXITED(status))
2399				sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
2400			if (WIFSTOPPED(status))
2401				sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
2402#ifdef WIFCONTINUED
2403			if (WIFCONTINUED(status))
2404				strcpy(buf, "WIFCONTINUED");
2405#endif
2406			fprintf(stderr, " [wait(0x%04x) = %u] %s\n", status, pid, buf);
2407		}
2408
2409		/* Look up `pid' in our table. */
2410		tcp = pid2tcb(pid);
2411		if (tcp == NULL) {
2412#ifdef LINUX
2413			if (followfork) {
2414				/* This is needed to go with the CLONE_PTRACE
2415				   changes in process.c/util.c: we might see
2416				   the child's initial trap before we see the
2417				   parent return from the clone syscall.
2418				   Leave the child suspended until the parent
2419				   returns from its system call.  Only then
2420				   will we have the association of parent and
2421				   child so that we know how to do clearbpt
2422				   in the child.  */
2423				tcp = alloctcb(pid);
2424				tcp->flags |= TCB_ATTACHED | TCB_STARTUP | TCB_IGNORE_ONE_SIGSTOP;
2425				if (!qflag)
2426					fprintf(stderr, "Process %d attached\n",
2427						pid);
2428			}
2429			else
2430				/* This can happen if a clone call used
2431				   CLONE_PTRACE itself.  */
2432#endif
2433			{
2434				if (WIFSTOPPED(status))
2435					ptrace(PTRACE_CONT, pid, (char *) 1, 0);
2436				error_msg_and_die("Unknown pid: %u", pid);
2437			}
2438		}
2439		/* set current output file */
2440		outf = tcp->outf;
2441		curcol = tcp->curcol;
2442#ifdef LINUX
2443		if (cflag) {
2444			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
2445			tcp->stime = ru.ru_stime;
2446		}
2447#endif
2448
2449		if (WIFSIGNALED(status)) {
2450			if (pid == strace_child)
2451				exit_code = 0x100 | WTERMSIG(status);
2452			if (cflag != CFLAG_ONLY_STATS
2453			    && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL)) {
2454				printleader(tcp);
2455#ifdef WCOREDUMP
2456				tprintf("+++ killed by %s %s+++",
2457					signame(WTERMSIG(status)),
2458					WCOREDUMP(status) ? "(core dumped) " : "");
2459#else
2460				tprintf("+++ killed by %s +++",
2461					signame(WTERMSIG(status)));
2462#endif
2463				printtrailer();
2464			}
2465			fflush(tcp->outf);
2466			droptcb(tcp);
2467			continue;
2468		}
2469		if (WIFEXITED(status)) {
2470			if (pid == strace_child)
2471				exit_code = WEXITSTATUS(status);
2472			if (tcp == tcp_last) {
2473				if ((tcp->flags & (TCB_INSYSCALL|TCB_REPRINT)) == TCB_INSYSCALL)
2474					tprintf(" <unfinished ... exit status %d>\n",
2475						WEXITSTATUS(status));
2476				tcp_last = NULL;
2477			}
2478			if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
2479				printleader(tcp);
2480				tprintf("+++ exited with %d +++", WEXITSTATUS(status));
2481				printtrailer();
2482			}
2483			fflush(tcp->outf);
2484			droptcb(tcp);
2485			continue;
2486		}
2487		if (!WIFSTOPPED(status)) {
2488			fprintf(stderr, "PANIC: pid %u not stopped\n", pid);
2489			droptcb(tcp);
2490			continue;
2491		}
2492
2493		/* Is this the very first time we see this tracee stopped? */
2494		if (tcp->flags & TCB_STARTUP) {
2495			if (debug)
2496				fprintf(stderr, "pid %d has TCB_STARTUP, initializing it\n", tcp->pid);
2497			tcp->flags &= ~TCB_STARTUP;
2498			if (tcp->flags & TCB_BPTSET) {
2499				/*
2500				 * One example is a breakpoint inherited from
2501				 * parent through fork().
2502				 */
2503				if (clearbpt(tcp) < 0) {
2504					/* Pretty fatal */
2505					droptcb(tcp);
2506					cleanup();
2507					return -1;
2508				}
2509			}
2510#ifdef LINUX
2511			if (ptrace_setoptions) {
2512				if (debug)
2513					fprintf(stderr, "setting opts %x on pid %d\n", ptrace_setoptions, tcp->pid);
2514				if (ptrace(PTRACE_SETOPTIONS, tcp->pid, NULL, ptrace_setoptions) < 0) {
2515					if (errno != ESRCH) {
2516						/* Should never happen, really */
2517						perror_msg_and_die("PTRACE_SETOPTIONS");
2518					}
2519				}
2520			}
2521#endif
2522		}
2523
2524		if (((unsigned)status >> 16) != 0) {
2525			/* Ptrace event (we ignore all of them for now) */
2526			goto restart_tracee_with_sig_0;
2527		}
2528
2529		sig = WSTOPSIG(status);
2530
2531		/* Is this post-attach SIGSTOP?
2532		 * Interestingly, the process may stop
2533		 * with STOPSIG equal to some other signal
2534		 * than SIGSTOP if we happend to attach
2535		 * just before the process takes a signal.
2536		 */
2537		if (sig == SIGSTOP && (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
2538			if (debug)
2539				fprintf(stderr, "ignored SIGSTOP on pid %d\n", tcp->pid);
2540			tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
2541			goto restart_tracee_with_sig_0;
2542		}
2543
2544		if (sig != syscall_trap_sig) {
2545			if (cflag != CFLAG_ONLY_STATS
2546			    && (qual_flags[sig] & QUAL_SIGNAL)) {
2547				siginfo_t si;
2548#if defined(PT_CR_IPSR) && defined(PT_CR_IIP)
2549				long pc = 0;
2550				long psr = 0;
2551
2552				upeek(tcp, PT_CR_IPSR, &psr);
2553				upeek(tcp, PT_CR_IIP, &pc);
2554
2555# define PSR_RI	41
2556				pc += (psr >> PSR_RI) & 0x3;
2557# define PC_FORMAT_STR	" @ %lx"
2558# define PC_FORMAT_ARG	, pc
2559#else
2560# define PC_FORMAT_STR	""
2561# define PC_FORMAT_ARG	/* nothing */
2562#endif
2563				printleader(tcp);
2564				if (ptrace(PTRACE_GETSIGINFO, pid, 0, (long) &si) == 0) {
2565					tprints("--- ");
2566					printsiginfo(&si, verbose(tcp));
2567					tprintf(" (%s)" PC_FORMAT_STR " ---",
2568						strsignal(sig)
2569						PC_FORMAT_ARG);
2570				} else
2571					tprintf("--- %s by %s" PC_FORMAT_STR " ---",
2572						strsignal(sig),
2573						signame(sig)
2574						PC_FORMAT_ARG);
2575				printtrailer();
2576				fflush(tcp->outf);
2577			}
2578			goto restart_tracee;
2579		}
2580
2581		/* We handled quick cases, we are permitted to interrupt now. */
2582		if (interrupted)
2583			return 0;
2584
2585		/* This should be syscall entry or exit.
2586		 * (Or it still can be that pesky post-execve SIGTRAP!)
2587		 * Handle it.
2588		 */
2589		if (trace_syscall(tcp) < 0 && !tcp->ptrace_errno) {
2590			/* ptrace() failed in trace_syscall() with ESRCH.
2591			 * Likely a result of process disappearing mid-flight.
2592			 * Observed case: exit_group() terminating
2593			 * all processes in thread group.
2594			 */
2595			if (tcp->flags & TCB_ATTACHED) {
2596				if (tcp_last) {
2597					/* Do we have dangling line "syscall(param, param"?
2598					 * Finish the line then.
2599					 */
2600					tcp_last->flags |= TCB_REPRINT;
2601					tprints(" <unfinished ...>");
2602					printtrailer();
2603					fflush(tcp->outf);
2604				}
2605				/* We assume that ptrace error was caused by process death.
2606				 * We used to detach(tcp) here, but since we no longer
2607				 * implement "detach before death" policy/hack,
2608				 * we can let this process to report its death to us
2609				 * normally, via WIFEXITED or WIFSIGNALED wait status.
2610				 */
2611			} else {
2612				/* It's our real child (and we also trace it) */
2613				/* my_tkill(pid, SIGKILL); - why? */
2614				/* droptcb(tcp); - why? */
2615			}
2616			continue;
2617		}
2618 restart_tracee_with_sig_0:
2619		sig = 0;
2620 restart_tracee:
2621		/* Remember current print column before continuing. */
2622		tcp->curcol = curcol;
2623		if (ptrace_restart(PTRACE_SYSCALL, tcp, sig) < 0) {
2624			cleanup();
2625			return -1;
2626		}
2627	}
2628	return 0;
2629}
2630
2631#endif /* !USE_PROCFS */
2632
2633void
2634tprintf(const char *fmt, ...)
2635{
2636	va_list args;
2637
2638	va_start(args, fmt);
2639	if (outf) {
2640		int n = vfprintf(outf, fmt, args);
2641		if (n < 0) {
2642			if (outf != stderr)
2643				perror(outfname == NULL
2644				       ? "<writing to pipe>" : outfname);
2645		} else
2646			curcol += n;
2647	}
2648	va_end(args);
2649}
2650
2651void
2652tprints(const char *str)
2653{
2654	if (outf) {
2655		int n = fputs(str, outf);
2656		if (n >= 0) {
2657			curcol += strlen(str);
2658			return;
2659		}
2660		if (outf != stderr)
2661			perror(outfname == NULL
2662			       ? "<writing to pipe>" : outfname);
2663	}
2664}
2665
2666void
2667printleader(struct tcb *tcp)
2668{
2669	if (tcp_last) {
2670		if (tcp_last->ptrace_errno) {
2671			if (tcp_last->flags & TCB_INSYSCALL) {
2672				tprints(" <unavailable>) ");
2673				tabto();
2674			}
2675			tprints("= ? <unavailable>\n");
2676			tcp_last->ptrace_errno = 0;
2677		} else if (!outfname || followfork < 2 || tcp_last == tcp) {
2678			tcp_last->flags |= TCB_REPRINT;
2679			tprints(" <unfinished ...>\n");
2680		}
2681	}
2682	curcol = 0;
2683	if ((followfork == 1 || pflag_seen > 1) && outfname)
2684		tprintf("%-5d ", tcp->pid);
2685	else if (nprocs > 1 && !outfname)
2686		tprintf("[pid %5u] ", tcp->pid);
2687	if (tflag) {
2688		char str[sizeof("HH:MM:SS")];
2689		struct timeval tv, dtv;
2690		static struct timeval otv;
2691
2692		gettimeofday(&tv, NULL);
2693		if (rflag) {
2694			if (otv.tv_sec == 0)
2695				otv = tv;
2696			tv_sub(&dtv, &tv, &otv);
2697			tprintf("%6ld.%06ld ",
2698				(long) dtv.tv_sec, (long) dtv.tv_usec);
2699			otv = tv;
2700		}
2701		else if (tflag > 2) {
2702			tprintf("%ld.%06ld ",
2703				(long) tv.tv_sec, (long) tv.tv_usec);
2704		}
2705		else {
2706			time_t local = tv.tv_sec;
2707			strftime(str, sizeof(str), "%T", localtime(&local));
2708			if (tflag > 1)
2709				tprintf("%s.%06ld ", str, (long) tv.tv_usec);
2710			else
2711				tprintf("%s ", str);
2712		}
2713	}
2714	if (iflag)
2715		printcall(tcp);
2716}
2717
2718void
2719tabto(void)
2720{
2721	if (curcol < acolumn)
2722		tprints(acolumn_spaces + curcol);
2723}
2724
2725void
2726printtrailer(void)
2727{
2728	tprints("\n");
2729	tcp_last = NULL;
2730}
2731
2732#ifdef HAVE_MP_PROCFS
2733
2734int
2735mp_ioctl(int fd, int cmd, void *arg, int size)
2736{
2737	struct iovec iov[2];
2738	int n = 1;
2739
2740	iov[0].iov_base = &cmd;
2741	iov[0].iov_len = sizeof cmd;
2742	if (arg) {
2743		++n;
2744		iov[1].iov_base = arg;
2745		iov[1].iov_len = size;
2746	}
2747
2748	return writev(fd, iov, n);
2749}
2750
2751#endif
2752