strace.c revision 04f8b4860f12512186481ca21dbd311d9d612326
1/*
2 * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3 * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4 * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5 * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 *	$Id$
31 */
32
33#include "defs.h"
34
35#include <sys/types.h>
36#include <stdarg.h>
37#include <signal.h>
38#include <errno.h>
39#include <sys/param.h>
40#include <fcntl.h>
41#include <sys/resource.h>
42#include <sys/wait.h>
43#include <sys/stat.h>
44#include <pwd.h>
45#include <grp.h>
46#include <string.h>
47#include <limits.h>
48#include <dirent.h>
49
50#ifdef LINUX
51# include <asm/unistd.h>
52# if defined __NR_tkill
53#  define my_tkill(tid, sig) syscall(__NR_tkill, (tid), (sig))
54# else
55   /* kill() may choose arbitrarily the target task of the process group
56      while we later wait on a that specific TID.  PID process waits become
57      TID task specific waits for a process under ptrace(2).  */
58#  warning "Neither tkill(2) nor tgkill(2) available, risk of strace hangs!"
59#  define my_tkill(tid, sig) kill((tid), (sig))
60# endif
61#endif
62
63#if defined(IA64) && defined(LINUX)
64# include <asm/ptrace_offsets.h>
65#endif
66
67#ifdef USE_PROCFS
68#include <poll.h>
69#endif
70
71#ifdef SVR4
72#include <sys/stropts.h>
73#ifdef HAVE_MP_PROCFS
74#ifdef HAVE_SYS_UIO_H
75#include <sys/uio.h>
76#endif
77#endif
78#endif
79extern char **environ;
80extern int optind;
81extern char *optarg;
82
83
84int debug = 0, followfork = 0;
85unsigned int ptrace_setoptions = 0;
86/* Which WSTOPSIG(status) value marks syscall traps? */
87static unsigned int syscall_trap_sig = SIGTRAP;
88int dtime = 0, xflag = 0, qflag = 0;
89cflag_t cflag = CFLAG_NONE;
90static int iflag = 0, interactive = 0, pflag_seen = 0, rflag = 0, tflag = 0;
91/*
92 * daemonized_tracer supports -D option.
93 * With this option, strace forks twice.
94 * Unlike normal case, with -D *grandparent* process exec's,
95 * becoming a traced process. Child exits (this prevents traced process
96 * from having children it doesn't expect to have), and grandchild
97 * attaches to grandparent similarly to strace -p PID.
98 * This allows for more transparent interaction in cases
99 * when process and its parent are communicating via signals,
100 * wait() etc. Without -D, strace process gets lodged in between,
101 * disrupting parent<->child link.
102 */
103static bool daemonized_tracer = 0;
104
105/* Sometimes we want to print only succeeding syscalls. */
106int not_failing_only = 0;
107
108/* Show path associated with fd arguments */
109int show_fd_path = 0;
110
111/* are we filtering traces based on paths? */
112int tracing_paths = 0;
113
114static int exit_code = 0;
115static int strace_child = 0;
116static int strace_tracer_pid = 0;
117
118static char *username = NULL;
119static uid_t run_uid;
120static gid_t run_gid;
121
122int acolumn = DEFAULT_ACOLUMN;
123int max_strlen = DEFAULT_STRLEN;
124static char *outfname = NULL;
125static FILE *outf;
126static int curcol;
127static struct tcb **tcbtab;
128static unsigned int nprocs, tcbtabsize;
129static const char *progname;
130
131static int detach(struct tcb *tcp, int sig);
132static int trace(void);
133static void cleanup(void);
134static void interrupt(int sig);
135static sigset_t empty_set, blocked_set;
136
137#ifdef HAVE_SIG_ATOMIC_T
138static volatile sig_atomic_t interrupted;
139#else /* !HAVE_SIG_ATOMIC_T */
140static volatile int interrupted;
141#endif /* !HAVE_SIG_ATOMIC_T */
142
143#ifdef USE_PROCFS
144
145static struct tcb *pfd2tcb(int pfd);
146static void reaper(int sig);
147static void rebuild_pollv(void);
148static struct pollfd *pollv;
149
150#ifndef HAVE_POLLABLE_PROCFS
151
152static void proc_poll_open(void);
153static void proc_poller(int pfd);
154
155struct proc_pollfd {
156	int fd;
157	int revents;
158	int pid;
159};
160
161static int poller_pid;
162static int proc_poll_pipe[2] = { -1, -1 };
163
164#endif /* !HAVE_POLLABLE_PROCFS */
165
166#ifdef HAVE_MP_PROCFS
167#define POLLWANT	POLLWRNORM
168#else
169#define POLLWANT	POLLPRI
170#endif
171#endif /* USE_PROCFS */
172
173static void
174usage(FILE *ofp, int exitval)
175{
176	fprintf(ofp, "\
177usage: strace [-CdDffhiqrtttTvVxxy] [-a column] [-e expr] ... [-o file]\n\
178              [-p pid] ... [-s strsize] [-u username] [-E var=val] ...\n\
179              [-P path] [command [arg ...]]\n\
180   or: strace -c [-D] [-e expr] ... [-O overhead] [-S sortby] [-E var=val] ...\n\
181              [command [arg ...]]\n\
182-c -- count time, calls, and errors for each syscall and report summary\n\
183-C -- like -c but also print regular output while processes are running\n\
184-f -- follow forks, -ff -- with output into separate files\n\
185-F -- attempt to follow vforks, -h -- print help message\n\
186-i -- print instruction pointer at time of syscall\n\
187-q -- suppress messages about attaching, detaching, etc.\n\
188-r -- print relative timestamp, -t -- absolute timestamp, -tt -- with usecs\n\
189-T -- print time spent in each syscall, -V -- print version\n\
190-v -- verbose mode: print unabbreviated argv, stat, termio[s], etc. args\n\
191-x -- print non-ascii strings in hex, -xx -- print all strings in hex\n\
192-y -- print paths associated with file descriptor arguments\n\
193-a column -- alignment COLUMN for printing syscall results (default %d)\n\
194-e expr -- a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
195   options: trace, abbrev, verbose, raw, signal, read, or write\n\
196-o file -- send trace output to FILE instead of stderr\n\
197-O overhead -- set overhead for tracing syscalls to OVERHEAD usecs\n\
198-p pid -- trace process with process id PID, may be repeated\n\
199-D -- run tracer process as a detached grandchild, not as parent\n\
200-s strsize -- limit length of print strings to STRSIZE chars (default %d)\n\
201-S sortby -- sort syscall counts by: time, calls, name, nothing (default %s)\n\
202-u username -- run command as username handling setuid and/or setgid\n\
203-E var=val -- put var=val in the environment for command\n\
204-E var -- remove var from the environment for command\n\
205-P path -- trace accesses to path\n\
206" /* this is broken, so don't document it
207-z -- print only succeeding syscalls\n\
208  */
209, DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
210	exit(exitval);
211}
212
213static void die(void) __attribute__ ((noreturn));
214static void die(void)
215{
216	if (strace_tracer_pid == getpid()) {
217		cflag = 0;
218		cleanup();
219	}
220	exit(1);
221}
222
223static void verror_msg(int err_no, const char *fmt, va_list p)
224{
225	fflush(NULL);
226	fprintf(stderr, "%s: ", progname);
227	vfprintf(stderr, fmt, p);
228	if (err_no)
229		fprintf(stderr, ": %s\n", strerror(err_no));
230	else
231		putc('\n', stderr);
232	fflush(stderr);
233}
234
235void error_msg(const char *fmt, ...)
236{
237	va_list p;
238	va_start(p, fmt);
239	verror_msg(0, fmt, p);
240	va_end(p);
241}
242
243void error_msg_and_die(const char *fmt, ...)
244{
245	va_list p;
246	va_start(p, fmt);
247	verror_msg(0, fmt, p);
248	die();
249}
250
251void perror_msg(const char *fmt, ...)
252{
253	va_list p;
254	va_start(p, fmt);
255	verror_msg(errno, fmt, p);
256	va_end(p);
257}
258
259void perror_msg_and_die(const char *fmt, ...)
260{
261	va_list p;
262	va_start(p, fmt);
263	verror_msg(errno, fmt, p);
264	die();
265}
266
267#ifdef SVR4
268#ifdef MIPS
269void
270foobar()
271{
272}
273#endif /* MIPS */
274#endif /* SVR4 */
275
276/* Glue for systems without a MMU that cannot provide fork() */
277#ifdef HAVE_FORK
278# define strace_vforked 0
279#else
280# define strace_vforked 1
281# define fork()         vfork()
282#endif
283
284static void
285set_cloexec_flag(int fd)
286{
287	int flags, newflags;
288
289	flags = fcntl(fd, F_GETFD);
290	if (flags < 0) {
291		/* Can happen only if fd is bad.
292		 * Should never happen: if it does, we have a bug
293		 * in the caller. Therefore we just abort
294		 * instead of propagating the error.
295		 */
296		perror_msg_and_die("fcntl(%d, F_GETFD)", fd);
297	}
298
299	newflags = flags | FD_CLOEXEC;
300	if (flags == newflags)
301		return;
302
303	fcntl(fd, F_SETFD, newflags); /* never fails */
304}
305
306/*
307 * When strace is setuid executable, we have to swap uids
308 * before and after filesystem and process management operations.
309 */
310static void
311swap_uid(void)
312{
313#ifndef SVR4
314	int euid = geteuid(), uid = getuid();
315
316	if (euid != uid && setreuid(euid, uid) < 0) {
317		perror_msg_and_die("setreuid");
318	}
319#endif
320}
321
322#if _LFS64_LARGEFILE
323# define fopen_for_output fopen64
324#else
325# define fopen_for_output fopen
326#endif
327
328static FILE *
329strace_fopen(const char *path)
330{
331	FILE *fp;
332
333	swap_uid();
334	fp = fopen_for_output(path, "w");
335	if (!fp)
336		perror_msg_and_die("Can't fopen '%s'", path);
337	swap_uid();
338	set_cloexec_flag(fileno(fp));
339	return fp;
340}
341
342static int popen_pid = 0;
343
344#ifndef _PATH_BSHELL
345# define _PATH_BSHELL "/bin/sh"
346#endif
347
348/*
349 * We cannot use standard popen(3) here because we have to distinguish
350 * popen child process from other processes we trace, and standard popen(3)
351 * does not export its child's pid.
352 */
353static FILE *
354strace_popen(const char *command)
355{
356	FILE *fp;
357	int fds[2];
358
359	swap_uid();
360	if (pipe(fds) < 0)
361		perror_msg_and_die("pipe");
362
363	set_cloexec_flag(fds[1]); /* never fails */
364
365	popen_pid = vfork();
366	if (popen_pid == -1)
367		perror_msg_and_die("vfork");
368
369	if (popen_pid == 0) {
370		/* child */
371		close(fds[1]);
372		if (fds[0] != 0) {
373			if (dup2(fds[0], 0))
374				perror_msg_and_die("dup2");
375			close(fds[0]);
376		}
377		execl(_PATH_BSHELL, "sh", "-c", command, NULL);
378		perror_msg_and_die("Can't execute '%s'", _PATH_BSHELL);
379	}
380
381	/* parent */
382	close(fds[0]);
383	swap_uid();
384	fp = fdopen(fds[1], "w");
385	if (!fp)
386		error_msg_and_die("Out of memory");
387	return fp;
388}
389
390static void
391newoutf(struct tcb *tcp)
392{
393	if (outfname && followfork > 1) {
394		char name[520 + sizeof(int) * 3];
395		sprintf(name, "%.512s.%u", outfname, tcp->pid);
396		tcp->outf = strace_fopen(name);
397	}
398}
399
400static void
401startup_attach(void)
402{
403	int tcbi;
404	struct tcb *tcp;
405
406	/*
407	 * Block user interruptions as we would leave the traced
408	 * process stopped (process state T) if we would terminate in
409	 * between PTRACE_ATTACH and wait4 () on SIGSTOP.
410	 * We rely on cleanup() from this point on.
411	 */
412	if (interactive)
413		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
414
415	if (daemonized_tracer) {
416		pid_t pid = fork();
417		if (pid < 0) {
418			_exit(1);
419		}
420		if (pid) { /* parent */
421			/*
422			 * Wait for grandchild to attach to straced process
423			 * (grandparent). Grandchild SIGKILLs us after it attached.
424			 * Grandparent's wait() is unblocked by our death,
425			 * it proceeds to exec the straced program.
426			 */
427			pause();
428			_exit(0); /* paranoia */
429		}
430		/* grandchild */
431		/* We will be the tracer process. Remember our new pid: */
432		strace_tracer_pid = getpid();
433	}
434
435	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
436		tcp = tcbtab[tcbi];
437
438		if (!(tcp->flags & TCB_INUSE) || !(tcp->flags & TCB_ATTACHED))
439			continue;
440#ifdef LINUX
441		if (tcp->flags & TCB_ATTACH_DONE)
442			continue;
443#endif
444		/* Reinitialize the output since it may have changed. */
445		tcp->outf = outf;
446		newoutf(tcp);
447
448#ifdef USE_PROCFS
449		if (proc_open(tcp, 1) < 0) {
450			fprintf(stderr, "trouble opening proc file\n");
451			droptcb(tcp);
452			continue;
453		}
454#else /* !USE_PROCFS */
455# ifdef LINUX
456		if (followfork && !daemonized_tracer) {
457			char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
458			DIR *dir;
459
460			sprintf(procdir, "/proc/%d/task", tcp->pid);
461			dir = opendir(procdir);
462			if (dir != NULL) {
463				unsigned int ntid = 0, nerr = 0;
464				struct dirent *de;
465				int tid;
466				while ((de = readdir(dir)) != NULL) {
467					if (de->d_fileno == 0)
468						continue;
469					tid = atoi(de->d_name);
470					if (tid <= 0)
471						continue;
472					++ntid;
473					if (ptrace(PTRACE_ATTACH, tid, (char *) 1, 0) < 0) {
474						++nerr;
475						if (debug)
476							fprintf(stderr, "attach to pid %d failed\n", tid);
477					}
478					else {
479						if (debug)
480							fprintf(stderr, "attach to pid %d succeeded\n", tid);
481						if (tid != tcp->pid) {
482							struct tcb *new_tcp = alloctcb(tid);
483							new_tcp->flags |= TCB_ATTACHED|TCB_ATTACH_DONE;
484						}
485					}
486					if (interactive) {
487						sigprocmask(SIG_SETMASK, &empty_set, NULL);
488						if (interrupted)
489							goto ret;
490						sigprocmask(SIG_BLOCK, &blocked_set, NULL);
491					}
492				}
493				closedir(dir);
494				ntid -= nerr;
495				if (ntid == 0) {
496					perror("attach: ptrace(PTRACE_ATTACH, ...)");
497					droptcb(tcp);
498					continue;
499				}
500				if (!qflag) {
501					fprintf(stderr, ntid > 1
502? "Process %u attached with %u threads - interrupt to quit\n"
503: "Process %u attached - interrupt to quit\n",
504						tcp->pid, ntid);
505				}
506				continue;
507			} /* if (opendir worked) */
508		} /* if (-f) */
509# endif /* LINUX */
510		if (ptrace(PTRACE_ATTACH, tcp->pid, (char *) 1, 0) < 0) {
511			perror("attach: ptrace(PTRACE_ATTACH, ...)");
512			droptcb(tcp);
513			continue;
514		}
515		if (debug)
516			fprintf(stderr, "attach to pid %d (main) succeeded\n", tcp->pid);
517
518		if (daemonized_tracer) {
519			/*
520			 * It is our grandparent we trace, not a -p PID.
521			 * Don't want to just detach on exit, so...
522			 */
523			tcp->flags &= ~TCB_ATTACHED;
524			/*
525			 * Make parent go away.
526			 * Also makes grandparent's wait() unblock.
527			 */
528			kill(getppid(), SIGKILL);
529		}
530
531#endif /* !USE_PROCFS */
532		if (!qflag)
533			fprintf(stderr,
534				"Process %u attached - interrupt to quit\n",
535				tcp->pid);
536	} /* for each tcbtab[] */
537
538 ret:
539#ifdef LINUX
540	/* TCB_ATTACH_DONE flag is used only in this function */
541	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
542		tcp = tcbtab[tcbi];
543		tcp->flags &= ~TCB_ATTACH_DONE;
544	}
545#endif
546
547	if (interactive)
548		sigprocmask(SIG_SETMASK, &empty_set, NULL);
549}
550
551static void
552startup_child(char **argv)
553{
554	struct stat statbuf;
555	const char *filename;
556	char pathname[MAXPATHLEN];
557	int pid = 0;
558	struct tcb *tcp;
559
560	filename = argv[0];
561	if (strchr(filename, '/')) {
562		if (strlen(filename) > sizeof pathname - 1) {
563			errno = ENAMETOOLONG;
564			perror_msg_and_die("exec");
565		}
566		strcpy(pathname, filename);
567	}
568#ifdef USE_DEBUGGING_EXEC
569	/*
570	 * Debuggers customarily check the current directory
571	 * first regardless of the path but doing that gives
572	 * security geeks a panic attack.
573	 */
574	else if (stat(filename, &statbuf) == 0)
575		strcpy(pathname, filename);
576#endif /* USE_DEBUGGING_EXEC */
577	else {
578		const char *path;
579		int m, n, len;
580
581		for (path = getenv("PATH"); path && *path; path += m) {
582			if (strchr(path, ':')) {
583				n = strchr(path, ':') - path;
584				m = n + 1;
585			}
586			else
587				m = n = strlen(path);
588			if (n == 0) {
589				if (!getcwd(pathname, MAXPATHLEN))
590					continue;
591				len = strlen(pathname);
592			}
593			else if (n > sizeof pathname - 1)
594				continue;
595			else {
596				strncpy(pathname, path, n);
597				len = n;
598			}
599			if (len && pathname[len - 1] != '/')
600				pathname[len++] = '/';
601			strcpy(pathname + len, filename);
602			if (stat(pathname, &statbuf) == 0 &&
603			    /* Accept only regular files
604			       with some execute bits set.
605			       XXX not perfect, might still fail */
606			    S_ISREG(statbuf.st_mode) &&
607			    (statbuf.st_mode & 0111))
608				break;
609		}
610	}
611	if (stat(pathname, &statbuf) < 0) {
612		perror_msg_and_die("Can't stat '%s'", filename);
613	}
614	strace_child = pid = fork();
615	if (pid < 0) {
616		perror_msg_and_die("fork");
617	}
618	if ((pid != 0 && daemonized_tracer) /* -D: parent to become a traced process */
619	 || (pid == 0 && !daemonized_tracer) /* not -D: child to become a traced process */
620	) {
621		pid = getpid();
622#ifdef USE_PROCFS
623		if (outf != stderr) close(fileno(outf));
624#ifdef MIPS
625		/* Kludge for SGI, see proc_open for details. */
626		sa.sa_handler = foobar;
627		sa.sa_flags = 0;
628		sigemptyset(&sa.sa_mask);
629		sigaction(SIGINT, &sa, NULL);
630#endif /* MIPS */
631#ifndef FREEBSD
632		pause();
633#else /* FREEBSD */
634		kill(pid, SIGSTOP); /* stop HERE */
635#endif /* FREEBSD */
636#else /* !USE_PROCFS */
637		if (outf != stderr)
638			close(fileno(outf));
639
640		if (!daemonized_tracer) {
641			if (ptrace(PTRACE_TRACEME, 0, (char *) 1, 0) < 0) {
642				perror_msg_and_die("ptrace(PTRACE_TRACEME, ...)");
643			}
644			if (debug)
645				kill(pid, SIGSTOP);
646		}
647
648		if (username != NULL || geteuid() == 0) {
649			uid_t run_euid = run_uid;
650			gid_t run_egid = run_gid;
651
652			if (statbuf.st_mode & S_ISUID)
653				run_euid = statbuf.st_uid;
654			if (statbuf.st_mode & S_ISGID)
655				run_egid = statbuf.st_gid;
656
657			/*
658			 * It is important to set groups before we
659			 * lose privileges on setuid.
660			 */
661			if (username != NULL) {
662				if (initgroups(username, run_gid) < 0) {
663					perror_msg_and_die("initgroups");
664				}
665				if (setregid(run_gid, run_egid) < 0) {
666					perror_msg_and_die("setregid");
667				}
668				if (setreuid(run_uid, run_euid) < 0) {
669					perror_msg_and_die("setreuid");
670				}
671			}
672		}
673		else
674			setreuid(run_uid, run_uid);
675
676		if (!daemonized_tracer) {
677			/*
678			 * Induce an immediate stop so that the parent
679			 * will resume us with PTRACE_SYSCALL and display
680			 * this execve call normally.
681			 * Unless of course we're on a no-MMU system where
682			 * we vfork()-ed, so we cannot stop the child.
683			 */
684			if (!strace_vforked)
685				kill(getpid(), SIGSTOP);
686		} else {
687			struct sigaction sv_sigchld;
688			sigaction(SIGCHLD, NULL, &sv_sigchld);
689			/*
690			 * Make sure it is not SIG_IGN, otherwise wait
691			 * will not block.
692			 */
693			signal(SIGCHLD, SIG_DFL);
694			/*
695			 * Wait for grandchild to attach to us.
696			 * It kills child after that, and wait() unblocks.
697			 */
698			alarm(3);
699			wait(NULL);
700			alarm(0);
701			sigaction(SIGCHLD, &sv_sigchld, NULL);
702		}
703#endif /* !USE_PROCFS */
704
705		execv(pathname, argv);
706		perror_msg_and_die("exec");
707	}
708
709	/* We are the tracer.  */
710	/* With -D, we are *child* here, IOW: different pid. Fetch it. */
711	strace_tracer_pid = getpid();
712
713	tcp = alloctcb(daemonized_tracer ? getppid() : pid);
714	if (daemonized_tracer) {
715		/* We want subsequent startup_attach() to attach to it.  */
716		tcp->flags |= TCB_ATTACHED;
717	}
718#ifdef USE_PROCFS
719	if (proc_open(tcp, 0) < 0) {
720		perror_msg_and_die("trouble opening proc file");
721	}
722#endif /* USE_PROCFS */
723}
724
725#ifdef LINUX
726static void kill_save_errno(pid_t pid, int sig)
727{
728	int saved_errno = errno;
729
730	(void) kill(pid, sig);
731	errno = saved_errno;
732}
733
734/*
735 * Test whether the kernel support PTRACE_O_TRACECLONE et al options.
736 * First fork a new child, call ptrace with PTRACE_SETOPTIONS on it,
737 * and then see which options are supported by the kernel.
738 */
739static void
740test_ptrace_setoptions_followfork(void)
741{
742	int pid, expected_grandchild = 0, found_grandchild = 0;
743	const unsigned int test_options = PTRACE_O_TRACECLONE |
744					  PTRACE_O_TRACEFORK |
745					  PTRACE_O_TRACEVFORK;
746
747	if ((pid = fork()) < 0)
748		perror_msg_and_die("fork");
749	else if (pid == 0) {
750		pid = getpid();
751		if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0)
752			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
753					   __func__);
754		kill(pid, SIGSTOP);
755		if (fork() < 0)
756			perror_msg_and_die("fork");
757		_exit(0);
758	}
759
760	while (1) {
761		int status, tracee_pid;
762
763		errno = 0;
764		tracee_pid = wait(&status);
765		if (tracee_pid <= 0) {
766			if (errno == EINTR)
767				continue;
768			else if (errno == ECHILD)
769				break;
770			kill_save_errno(pid, SIGKILL);
771			perror_msg_and_die("%s: unexpected wait result %d",
772					   __func__, tracee_pid);
773		}
774		if (WIFEXITED(status)) {
775			if (WEXITSTATUS(status)) {
776				if (tracee_pid != pid)
777					kill_save_errno(pid, SIGKILL);
778				error_msg_and_die("%s: unexpected exit status %u",
779						  __func__, WEXITSTATUS(status));
780			}
781			continue;
782		}
783		if (WIFSIGNALED(status)) {
784			if (tracee_pid != pid)
785				kill_save_errno(pid, SIGKILL);
786			error_msg_and_die("%s: unexpected signal %u",
787					  __func__, WTERMSIG(status));
788		}
789		if (!WIFSTOPPED(status)) {
790			if (tracee_pid != pid)
791				kill_save_errno(tracee_pid, SIGKILL);
792			kill(pid, SIGKILL);
793			error_msg_and_die("%s: unexpected wait status %x",
794					  __func__, status);
795		}
796		if (tracee_pid != pid) {
797			found_grandchild = tracee_pid;
798			if (ptrace(PTRACE_CONT, tracee_pid, 0, 0) < 0) {
799				kill_save_errno(tracee_pid, SIGKILL);
800				kill_save_errno(pid, SIGKILL);
801				perror_msg_and_die("PTRACE_CONT doesn't work");
802			}
803			continue;
804		}
805		switch (WSTOPSIG(status)) {
806		case SIGSTOP:
807			if (ptrace(PTRACE_SETOPTIONS, pid, 0, test_options) < 0
808			    && errno != EINVAL && errno != EIO)
809				perror_msg("PTRACE_SETOPTIONS");
810			break;
811		case SIGTRAP:
812			if (status >> 16 == PTRACE_EVENT_FORK) {
813				long msg = 0;
814
815				if (ptrace(PTRACE_GETEVENTMSG, pid,
816					   NULL, (long) &msg) == 0)
817					expected_grandchild = msg;
818			}
819			break;
820		}
821		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) {
822			kill_save_errno(pid, SIGKILL);
823			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
824		}
825	}
826	if (expected_grandchild && expected_grandchild == found_grandchild) {
827		ptrace_setoptions |= test_options;
828		if (debug)
829			fprintf(stderr, "ptrace_setoptions = %#x\n",
830				ptrace_setoptions);
831		return;
832	}
833	error_msg("Test for PTRACE_O_TRACECLONE failed, "
834		  "giving up using this feature.");
835}
836
837/*
838 * Test whether the kernel support PTRACE_O_TRACESYSGOOD.
839 * First fork a new child, call ptrace(PTRACE_SETOPTIONS) on it,
840 * and then see whether it will stop with (SIGTRAP | 0x80).
841 *
842 * Use of this option enables correct handling of user-generated SIGTRAPs,
843 * and SIGTRAPs generated by special instructions such as int3 on x86:
844 * _start:	.globl	_start
845 *		int3
846 *		movl	$42, %ebx
847 *		movl	$1, %eax
848 *		int	$0x80
849 * (compile with: "gcc -nostartfiles -nostdlib -o int3 int3.S")
850 */
851static void
852test_ptrace_setoptions_for_all(void)
853{
854	const unsigned int test_options = PTRACE_O_TRACESYSGOOD |
855					  PTRACE_O_TRACEEXEC;
856	int pid;
857	int it_worked = 0;
858
859	pid = fork();
860	if (pid < 0)
861		perror_msg_and_die("fork");
862
863	if (pid == 0) {
864		pid = getpid();
865		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
866			/* Note: exits with exitcode 1 */
867			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
868					   __func__);
869		kill(pid, SIGSTOP);
870		_exit(0); /* parent should see entry into this syscall */
871	}
872
873	while (1) {
874		int status, tracee_pid;
875
876		errno = 0;
877		tracee_pid = wait(&status);
878		if (tracee_pid <= 0) {
879			if (errno == EINTR)
880				continue;
881			kill_save_errno(pid, SIGKILL);
882			perror_msg_and_die("%s: unexpected wait result %d",
883					   __func__, tracee_pid);
884		}
885		if (WIFEXITED(status)) {
886			if (WEXITSTATUS(status) == 0)
887				break;
888			error_msg_and_die("%s: unexpected exit status %u",
889					  __func__, WEXITSTATUS(status));
890		}
891		if (WIFSIGNALED(status)) {
892			error_msg_and_die("%s: unexpected signal %u",
893					  __func__, WTERMSIG(status));
894		}
895		if (!WIFSTOPPED(status)) {
896			kill(pid, SIGKILL);
897			error_msg_and_die("%s: unexpected wait status %x",
898					  __func__, status);
899		}
900		if (WSTOPSIG(status) == SIGSTOP) {
901			/*
902			 * We don't check "options aren't accepted" error.
903			 * If it happens, we'll never get (SIGTRAP | 0x80),
904			 * and thus will decide to not use the option.
905			 * IOW: the outcome of the test will be correct.
906			 */
907			if (ptrace(PTRACE_SETOPTIONS, pid, 0L, test_options) < 0
908			    && errno != EINVAL && errno != EIO)
909				perror_msg("PTRACE_SETOPTIONS");
910		}
911		if (WSTOPSIG(status) == (SIGTRAP | 0x80)) {
912			it_worked = 1;
913		}
914		if (ptrace(PTRACE_SYSCALL, pid, 0L, 0L) < 0) {
915			kill_save_errno(pid, SIGKILL);
916			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
917		}
918	}
919
920	if (it_worked) {
921		syscall_trap_sig = (SIGTRAP | 0x80);
922		ptrace_setoptions |= test_options;
923		if (debug)
924			fprintf(stderr, "ptrace_setoptions = %#x\n",
925				ptrace_setoptions);
926		return;
927	}
928
929	error_msg("Test for PTRACE_O_TRACESYSGOOD failed, "
930		  "giving up using this feature.");
931}
932#endif
933
934int
935main(int argc, char *argv[])
936{
937	struct tcb *tcp;
938	int c, pid = 0;
939	int optF = 0;
940	struct sigaction sa;
941
942	progname = argv[0] ? argv[0] : "strace";
943
944	strace_tracer_pid = getpid();
945
946	/* Allocate the initial tcbtab.  */
947	tcbtabsize = argc;	/* Surely enough for all -p args.  */
948	tcbtab = calloc(tcbtabsize, sizeof(tcbtab[0]));
949	if (tcbtab == NULL)
950		error_msg_and_die("Out of memory");
951	tcp = calloc(tcbtabsize, sizeof(*tcp));
952	if (tcp == NULL)
953		error_msg_and_die("Out of memory");
954	for (c = 0; c < tcbtabsize; c++)
955		tcbtab[c] = tcp++;
956
957	outf = stderr;
958	interactive = 1;
959	set_sortby(DEFAULT_SORTBY);
960	set_personality(DEFAULT_PERSONALITY);
961	qualify("trace=all");
962	qualify("abbrev=all");
963	qualify("verbose=all");
964	qualify("signal=all");
965	while ((c = getopt(argc, argv,
966		"+cCdfFhiqrtTvVxyz"
967#ifndef USE_PROCFS
968		"D"
969#endif
970		"a:e:o:O:p:s:S:u:E:P:")) != EOF) {
971		switch (c) {
972		case 'c':
973			if (cflag == CFLAG_BOTH) {
974				error_msg_and_die("-c and -C are mutually exclusive options");
975			}
976			cflag = CFLAG_ONLY_STATS;
977			break;
978		case 'C':
979			if (cflag == CFLAG_ONLY_STATS) {
980				error_msg_and_die("-c and -C are mutually exclusive options");
981			}
982			cflag = CFLAG_BOTH;
983			break;
984		case 'd':
985			debug++;
986			break;
987#ifndef USE_PROCFS
988		case 'D':
989			daemonized_tracer = 1;
990			break;
991#endif
992		case 'F':
993			optF = 1;
994			break;
995		case 'f':
996			followfork++;
997			break;
998		case 'h':
999			usage(stdout, 0);
1000			break;
1001		case 'i':
1002			iflag++;
1003			break;
1004		case 'q':
1005			qflag++;
1006			break;
1007		case 'r':
1008			rflag++;
1009			tflag++;
1010			break;
1011		case 't':
1012			tflag++;
1013			break;
1014		case 'T':
1015			dtime++;
1016			break;
1017		case 'x':
1018			xflag++;
1019			break;
1020		case 'y':
1021			show_fd_path = 1;
1022			break;
1023		case 'v':
1024			qualify("abbrev=none");
1025			break;
1026		case 'V':
1027			printf("%s -- version %s\n", PACKAGE_NAME, VERSION);
1028			exit(0);
1029			break;
1030		case 'z':
1031			not_failing_only = 1;
1032			break;
1033		case 'a':
1034			acolumn = atoi(optarg);
1035			break;
1036		case 'e':
1037			qualify(optarg);
1038			break;
1039		case 'o':
1040			outfname = strdup(optarg);
1041			break;
1042		case 'O':
1043			set_overhead(atoi(optarg));
1044			break;
1045		case 'p':
1046			if ((pid = atoi(optarg)) <= 0) {
1047				error_msg("Invalid process id: '%s'", optarg);
1048				break;
1049			}
1050			if (pid == strace_tracer_pid) {
1051				error_msg("I'm sorry, I can't let you do that, Dave.");
1052				break;
1053			}
1054			tcp = alloc_tcb(pid, 0);
1055			tcp->flags |= TCB_ATTACHED;
1056			pflag_seen++;
1057			break;
1058		case 'P':
1059			tracing_paths = 1;
1060			if (pathtrace_select(optarg)) {
1061				error_msg_and_die("Failed to select path '%s'", optarg);
1062			}
1063			break;
1064		case 's':
1065			max_strlen = atoi(optarg);
1066			if (max_strlen < 0) {
1067				error_msg_and_die("Invalid -s argument: '%s'", optarg);
1068			}
1069			break;
1070		case 'S':
1071			set_sortby(optarg);
1072			break;
1073		case 'u':
1074			username = strdup(optarg);
1075			break;
1076		case 'E':
1077			if (putenv(optarg) < 0) {
1078				error_msg_and_die("Out of memory");
1079			}
1080			break;
1081		default:
1082			usage(stderr, 1);
1083			break;
1084		}
1085	}
1086
1087	if ((optind == argc) == !pflag_seen)
1088		usage(stderr, 1);
1089
1090	if (pflag_seen && daemonized_tracer) {
1091		error_msg_and_die("-D and -p are mutually exclusive options");
1092	}
1093
1094	if (!followfork)
1095		followfork = optF;
1096
1097	if (followfork > 1 && cflag) {
1098		error_msg_and_die("(-c or -C) and -ff are mutually exclusive options");
1099	}
1100
1101	/* See if they want to run as another user. */
1102	if (username != NULL) {
1103		struct passwd *pent;
1104
1105		if (getuid() != 0 || geteuid() != 0) {
1106			error_msg_and_die("You must be root to use the -u option");
1107		}
1108		if ((pent = getpwnam(username)) == NULL) {
1109			error_msg_and_die("Cannot find user '%s'", username);
1110		}
1111		run_uid = pent->pw_uid;
1112		run_gid = pent->pw_gid;
1113	}
1114	else {
1115		run_uid = getuid();
1116		run_gid = getgid();
1117	}
1118
1119#ifdef LINUX
1120	if (followfork)
1121		test_ptrace_setoptions_followfork();
1122	test_ptrace_setoptions_for_all();
1123#endif
1124
1125	/* Check if they want to redirect the output. */
1126	if (outfname) {
1127		/* See if they want to pipe the output. */
1128		if (outfname[0] == '|' || outfname[0] == '!') {
1129			/*
1130			 * We can't do the <outfname>.PID funny business
1131			 * when using popen, so prohibit it.
1132			 */
1133			if (followfork > 1)
1134				error_msg_and_die("Piping the output and -ff are mutually exclusive");
1135			outf = strace_popen(outfname + 1);
1136		}
1137		else if (followfork <= 1)
1138			outf = strace_fopen(outfname);
1139	}
1140
1141	if (!outfname || outfname[0] == '|' || outfname[0] == '!') {
1142		static char buf[BUFSIZ];
1143		setvbuf(outf, buf, _IOLBF, BUFSIZ);
1144	}
1145	if (outfname && optind < argc) {
1146		interactive = 0;
1147		qflag = 1;
1148	}
1149
1150	/* Valid states here:
1151	   optind < argc	pflag_seen	outfname	interactive
1152	   1			0		0		1
1153	   0			1		0		1
1154	   1			0		1		0
1155	   0			1		1		1
1156	 */
1157
1158	/* STARTUP_CHILD must be called before the signal handlers get
1159	   installed below as they are inherited into the spawned process.
1160	   Also we do not need to be protected by them as during interruption
1161	   in the STARTUP_CHILD mode we kill the spawned process anyway.  */
1162	if (!pflag_seen)
1163		startup_child(&argv[optind]);
1164
1165	sigemptyset(&empty_set);
1166	sigemptyset(&blocked_set);
1167	sa.sa_handler = SIG_IGN;
1168	sigemptyset(&sa.sa_mask);
1169	sa.sa_flags = 0;
1170	sigaction(SIGTTOU, &sa, NULL);
1171	sigaction(SIGTTIN, &sa, NULL);
1172	if (interactive) {
1173		sigaddset(&blocked_set, SIGHUP);
1174		sigaddset(&blocked_set, SIGINT);
1175		sigaddset(&blocked_set, SIGQUIT);
1176		sigaddset(&blocked_set, SIGPIPE);
1177		sigaddset(&blocked_set, SIGTERM);
1178		sa.sa_handler = interrupt;
1179#ifdef SUNOS4
1180		/* POSIX signals on sunos4.1 are a little broken. */
1181		sa.sa_flags = SA_INTERRUPT;
1182#endif /* SUNOS4 */
1183	}
1184	sigaction(SIGHUP, &sa, NULL);
1185	sigaction(SIGINT, &sa, NULL);
1186	sigaction(SIGQUIT, &sa, NULL);
1187	sigaction(SIGPIPE, &sa, NULL);
1188	sigaction(SIGTERM, &sa, NULL);
1189#ifdef USE_PROCFS
1190	sa.sa_handler = reaper;
1191	sigaction(SIGCHLD, &sa, NULL);
1192#else
1193	/* Make sure SIGCHLD has the default action so that waitpid
1194	   definitely works without losing track of children.  The user
1195	   should not have given us a bogus state to inherit, but he might
1196	   have.  Arguably we should detect SIG_IGN here and pass it on
1197	   to children, but probably noone really needs that.  */
1198	sa.sa_handler = SIG_DFL;
1199	sigaction(SIGCHLD, &sa, NULL);
1200#endif /* USE_PROCFS */
1201
1202	if (pflag_seen || daemonized_tracer)
1203		startup_attach();
1204
1205	if (trace() < 0)
1206		exit(1);
1207	cleanup();
1208	fflush(NULL);
1209	if (exit_code > 0xff) {
1210		/* Child was killed by a signal, mimic that.  */
1211		exit_code &= 0xff;
1212		signal(exit_code, SIG_DFL);
1213		raise(exit_code);
1214		/* Paranoia - what if this signal is not fatal?
1215		   Exit with 128 + signo then.  */
1216		exit_code += 128;
1217	}
1218	exit(exit_code);
1219}
1220
1221static void
1222expand_tcbtab(void)
1223{
1224	/* Allocate some more TCBs and expand the table.
1225	   We don't want to relocate the TCBs because our
1226	   callers have pointers and it would be a pain.
1227	   So tcbtab is a table of pointers.  Since we never
1228	   free the TCBs, we allocate a single chunk of many.  */
1229	int i = tcbtabsize;
1230	struct tcb *newtcbs = calloc(tcbtabsize, sizeof(newtcbs[0]));
1231	struct tcb **newtab = realloc(tcbtab, tcbtabsize * 2 * sizeof(tcbtab[0]));
1232	if (newtab == NULL || newtcbs == NULL)
1233		error_msg_and_die("expand_tcbtab: out of memory");
1234	tcbtabsize *= 2;
1235	tcbtab = newtab;
1236	while (i < tcbtabsize)
1237		tcbtab[i++] = newtcbs++;
1238}
1239
1240struct tcb *
1241alloc_tcb(int pid, int command_options_parsed)
1242{
1243	int i;
1244	struct tcb *tcp;
1245
1246	if (nprocs == tcbtabsize)
1247		expand_tcbtab();
1248
1249	for (i = 0; i < tcbtabsize; i++) {
1250		tcp = tcbtab[i];
1251		if ((tcp->flags & TCB_INUSE) == 0) {
1252			memset(tcp, 0, sizeof(*tcp));
1253			tcp->pid = pid;
1254			tcp->flags = TCB_INUSE | TCB_STARTUP;
1255			tcp->outf = outf; /* Initialise to current out file */
1256			tcp->pfd = -1;
1257			nprocs++;
1258			if (debug)
1259				fprintf(stderr, "new tcb for pid %d, active tcbs:%d\n", tcp->pid, nprocs);
1260			if (command_options_parsed)
1261				newoutf(tcp);
1262			return tcp;
1263		}
1264	}
1265	error_msg_and_die("bug in alloc_tcb");
1266}
1267
1268#ifdef USE_PROCFS
1269int
1270proc_open(struct tcb *tcp, int attaching)
1271{
1272	char proc[32];
1273	long arg;
1274#ifdef SVR4
1275	int i;
1276	sysset_t syscalls;
1277	sigset_t signals;
1278	fltset_t faults;
1279#endif
1280#ifndef HAVE_POLLABLE_PROCFS
1281	static int last_pfd;
1282#endif
1283
1284#ifdef HAVE_MP_PROCFS
1285	/* Open the process pseudo-files in /proc. */
1286	sprintf(proc, "/proc/%d/ctl", tcp->pid);
1287	if ((tcp->pfd = open(proc, O_WRONLY|O_EXCL)) < 0) {
1288		perror("strace: open(\"/proc/...\", ...)");
1289		return -1;
1290	}
1291	set_cloexec_flag(tcp->pfd);
1292	sprintf(proc, "/proc/%d/status", tcp->pid);
1293	if ((tcp->pfd_stat = open(proc, O_RDONLY|O_EXCL)) < 0) {
1294		perror("strace: open(\"/proc/...\", ...)");
1295		return -1;
1296	}
1297	set_cloexec_flag(tcp->pfd_stat);
1298	sprintf(proc, "/proc/%d/as", tcp->pid);
1299	if ((tcp->pfd_as = open(proc, O_RDONLY|O_EXCL)) < 0) {
1300		perror("strace: open(\"/proc/...\", ...)");
1301		return -1;
1302	}
1303	set_cloexec_flag(tcp->pfd_as);
1304#else
1305	/* Open the process pseudo-file in /proc. */
1306#ifndef FREEBSD
1307	sprintf(proc, "/proc/%d", tcp->pid);
1308	tcp->pfd = open(proc, O_RDWR|O_EXCL);
1309#else /* FREEBSD */
1310	sprintf(proc, "/proc/%d/mem", tcp->pid);
1311	tcp->pfd = open(proc, O_RDWR);
1312#endif /* FREEBSD */
1313	if (tcp->pfd < 0) {
1314		perror("strace: open(\"/proc/...\", ...)");
1315		return -1;
1316	}
1317	set_cloexec_flag(tcp->pfd);
1318#endif
1319#ifdef FREEBSD
1320	sprintf(proc, "/proc/%d/regs", tcp->pid);
1321	if ((tcp->pfd_reg = open(proc, O_RDONLY)) < 0) {
1322		perror("strace: open(\"/proc/.../regs\", ...)");
1323		return -1;
1324	}
1325	if (cflag) {
1326		sprintf(proc, "/proc/%d/status", tcp->pid);
1327		if ((tcp->pfd_status = open(proc, O_RDONLY)) < 0) {
1328			perror("strace: open(\"/proc/.../status\", ...)");
1329			return -1;
1330		}
1331	} else
1332		tcp->pfd_status = -1;
1333#endif /* FREEBSD */
1334	rebuild_pollv();
1335	if (!attaching) {
1336		/*
1337		 * Wait for the child to pause.  Because of a race
1338		 * condition we have to poll for the event.
1339		 */
1340		for (;;) {
1341			if (IOCTL_STATUS(tcp) < 0) {
1342				perror("strace: PIOCSTATUS");
1343				return -1;
1344			}
1345			if (tcp->status.PR_FLAGS & PR_ASLEEP)
1346				break;
1347		}
1348	}
1349#ifndef FREEBSD
1350	/* Stop the process so that we own the stop. */
1351	if (IOCTL(tcp->pfd, PIOCSTOP, (char *)NULL) < 0) {
1352		perror("strace: PIOCSTOP");
1353		return -1;
1354	}
1355#endif
1356#ifdef PIOCSET
1357	/* Set Run-on-Last-Close. */
1358	arg = PR_RLC;
1359	if (IOCTL(tcp->pfd, PIOCSET, &arg) < 0) {
1360		perror("PIOCSET PR_RLC");
1361		return -1;
1362	}
1363	/* Set or Reset Inherit-on-Fork. */
1364	arg = PR_FORK;
1365	if (IOCTL(tcp->pfd, followfork ? PIOCSET : PIOCRESET, &arg) < 0) {
1366		perror("PIOC{SET,RESET} PR_FORK");
1367		return -1;
1368	}
1369#else  /* !PIOCSET */
1370#ifndef FREEBSD
1371	if (ioctl(tcp->pfd, PIOCSRLC) < 0) {
1372		perror("PIOCSRLC");
1373		return -1;
1374	}
1375	if (ioctl(tcp->pfd, followfork ? PIOCSFORK : PIOCRFORK) < 0) {
1376		perror("PIOC{S,R}FORK");
1377		return -1;
1378	}
1379#else /* FREEBSD */
1380	/* just unset the PF_LINGER flag for the Run-on-Last-Close. */
1381	if (ioctl(tcp->pfd, PIOCGFL, &arg) < 0) {
1382	        perror("PIOCGFL");
1383		return -1;
1384	}
1385	arg &= ~PF_LINGER;
1386	if (ioctl(tcp->pfd, PIOCSFL, arg) < 0) {
1387		perror("PIOCSFL");
1388		return -1;
1389	}
1390#endif /* FREEBSD */
1391#endif /* !PIOCSET */
1392#ifndef FREEBSD
1393	/* Enable all syscall entries we care about. */
1394	premptyset(&syscalls);
1395	for (i = 1; i < MAX_QUALS; ++i) {
1396		if (i > (sizeof syscalls) * CHAR_BIT) break;
1397		if (qual_flags[i] & QUAL_TRACE) praddset(&syscalls, i);
1398	}
1399	praddset(&syscalls, SYS_execve);
1400	if (followfork) {
1401		praddset(&syscalls, SYS_fork);
1402#ifdef SYS_forkall
1403		praddset(&syscalls, SYS_forkall);
1404#endif
1405#ifdef SYS_fork1
1406		praddset(&syscalls, SYS_fork1);
1407#endif
1408#ifdef SYS_rfork1
1409		praddset(&syscalls, SYS_rfork1);
1410#endif
1411#ifdef SYS_rforkall
1412		praddset(&syscalls, SYS_rforkall);
1413#endif
1414	}
1415	if (IOCTL(tcp->pfd, PIOCSENTRY, &syscalls) < 0) {
1416		perror("PIOCSENTRY");
1417		return -1;
1418	}
1419	/* Enable the syscall exits. */
1420	if (IOCTL(tcp->pfd, PIOCSEXIT, &syscalls) < 0) {
1421		perror("PIOSEXIT");
1422		return -1;
1423	}
1424	/* Enable signals we care about. */
1425	premptyset(&signals);
1426	for (i = 1; i < MAX_QUALS; ++i) {
1427		if (i > (sizeof signals) * CHAR_BIT) break;
1428		if (qual_flags[i] & QUAL_SIGNAL) praddset(&signals, i);
1429	}
1430	if (IOCTL(tcp->pfd, PIOCSTRACE, &signals) < 0) {
1431		perror("PIOCSTRACE");
1432		return -1;
1433	}
1434	/* Enable faults we care about */
1435	premptyset(&faults);
1436	for (i = 1; i < MAX_QUALS; ++i) {
1437		if (i > (sizeof faults) * CHAR_BIT) break;
1438		if (qual_flags[i] & QUAL_FAULT) praddset(&faults, i);
1439	}
1440	if (IOCTL(tcp->pfd, PIOCSFAULT, &faults) < 0) {
1441		perror("PIOCSFAULT");
1442		return -1;
1443	}
1444#else /* FREEBSD */
1445	/* set events flags. */
1446	arg = S_SIG | S_SCE | S_SCX;
1447	if (ioctl(tcp->pfd, PIOCBIS, arg) < 0) {
1448		perror("PIOCBIS");
1449		return -1;
1450	}
1451#endif /* FREEBSD */
1452	if (!attaching) {
1453#ifdef MIPS
1454		/*
1455		 * The SGI PRSABORT doesn't work for pause() so
1456		 * we send it a caught signal to wake it up.
1457		 */
1458		kill(tcp->pid, SIGINT);
1459#else /* !MIPS */
1460#ifdef PRSABORT
1461		/* The child is in a pause(), abort it. */
1462		arg = PRSABORT;
1463		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0) {
1464			perror("PIOCRUN");
1465			return -1;
1466		}
1467#endif
1468#endif /* !MIPS*/
1469#ifdef FREEBSD
1470		/* wake up the child if it received the SIGSTOP */
1471		kill(tcp->pid, SIGCONT);
1472#endif
1473		for (;;) {
1474			/* Wait for the child to do something. */
1475			if (IOCTL_WSTOP(tcp) < 0) {
1476				perror("PIOCWSTOP");
1477				return -1;
1478			}
1479			if (tcp->status.PR_WHY == PR_SYSENTRY) {
1480				tcp->flags &= ~TCB_INSYSCALL;
1481				get_scno(tcp);
1482				if (known_scno(tcp) == SYS_execve)
1483					break;
1484			}
1485			/* Set it running: maybe execve will be next. */
1486#ifndef FREEBSD
1487			arg = 0;
1488			if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0) {
1489#else /* FREEBSD */
1490			if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0) {
1491#endif /* FREEBSD */
1492				perror("PIOCRUN");
1493				return -1;
1494			}
1495#ifdef FREEBSD
1496			/* handle the case where we "opened" the child before
1497			   it did the kill -STOP */
1498			if (tcp->status.PR_WHY == PR_SIGNALLED &&
1499			    tcp->status.PR_WHAT == SIGSTOP)
1500			        kill(tcp->pid, SIGCONT);
1501#endif
1502		}
1503#ifndef FREEBSD
1504	}
1505#else /* FREEBSD */
1506	} else {
1507		if (attaching < 2) {
1508			/* We are attaching to an already running process.
1509			 * Try to figure out the state of the process in syscalls,
1510			 * to handle the first event well.
1511			 * This is done by having a look at the "wchan" property of the
1512			 * process, which tells where it is stopped (if it is). */
1513			FILE * status;
1514			char wchan[20]; /* should be enough */
1515
1516			sprintf(proc, "/proc/%d/status", tcp->pid);
1517			status = fopen(proc, "r");
1518			if (status &&
1519			    (fscanf(status, "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d"
1520				    "%*d,%*d %*d,%*d %19s", wchan) == 1) &&
1521			    strcmp(wchan, "nochan") && strcmp(wchan, "spread") &&
1522			    strcmp(wchan, "stopevent")) {
1523				/* The process is asleep in the middle of a syscall.
1524				   Fake the syscall entry event */
1525				tcp->flags &= ~(TCB_INSYSCALL|TCB_STARTUP);
1526				tcp->status.PR_WHY = PR_SYSENTRY;
1527				trace_syscall(tcp);
1528			}
1529			if (status)
1530				fclose(status);
1531		} /* otherwise it's a fork being followed */
1532	}
1533#endif /* FREEBSD */
1534#ifndef HAVE_POLLABLE_PROCFS
1535	if (proc_poll_pipe[0] != -1)
1536		proc_poller(tcp->pfd);
1537	else if (nprocs > 1) {
1538		proc_poll_open();
1539		proc_poller(last_pfd);
1540		proc_poller(tcp->pfd);
1541	}
1542	last_pfd = tcp->pfd;
1543#endif /* !HAVE_POLLABLE_PROCFS */
1544	return 0;
1545}
1546
1547#endif /* USE_PROCFS */
1548
1549struct tcb *
1550pid2tcb(int pid)
1551{
1552	int i;
1553
1554	if (pid <= 0)
1555		return NULL;
1556
1557	for (i = 0; i < tcbtabsize; i++) {
1558		struct tcb *tcp = tcbtab[i];
1559		if (tcp->pid == pid && (tcp->flags & TCB_INUSE))
1560			return tcp;
1561	}
1562
1563	return NULL;
1564}
1565
1566#ifdef USE_PROCFS
1567
1568static struct tcb *
1569first_used_tcb(void)
1570{
1571	int i;
1572	struct tcb *tcp;
1573	for (i = 0; i < tcbtabsize; i++) {
1574		tcp = tcbtab[i];
1575		if (tcp->flags & TCB_INUSE)
1576			return tcp;
1577	}
1578	return NULL;
1579}
1580
1581static struct tcb *
1582pfd2tcb(int pfd)
1583{
1584	int i;
1585
1586	for (i = 0; i < tcbtabsize; i++) {
1587		struct tcb *tcp = tcbtab[i];
1588		if (tcp->pfd != pfd)
1589			continue;
1590		if (tcp->flags & TCB_INUSE)
1591			return tcp;
1592	}
1593	return NULL;
1594}
1595
1596#endif /* USE_PROCFS */
1597
1598void
1599droptcb(struct tcb *tcp)
1600{
1601	if (tcp->pid == 0)
1602		return;
1603
1604	nprocs--;
1605	if (debug)
1606		fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
1607
1608	if (tcp->pfd != -1) {
1609		close(tcp->pfd);
1610		tcp->pfd = -1;
1611#ifdef FREEBSD
1612		if (tcp->pfd_reg != -1) {
1613		        close(tcp->pfd_reg);
1614		        tcp->pfd_reg = -1;
1615		}
1616		if (tcp->pfd_status != -1) {
1617			close(tcp->pfd_status);
1618			tcp->pfd_status = -1;
1619		}
1620#endif /* !FREEBSD */
1621#ifdef USE_PROCFS
1622		tcp->flags = 0; /* rebuild_pollv needs it */
1623		rebuild_pollv();
1624#endif
1625	}
1626
1627	if (outfname && followfork > 1 && tcp->outf)
1628		fclose(tcp->outf);
1629
1630	memset(tcp, 0, sizeof(*tcp));
1631}
1632
1633/* detach traced process; continue with sig
1634   Never call DETACH twice on the same process as both unattached and
1635   attached-unstopped processes give the same ESRCH.  For unattached process we
1636   would SIGSTOP it and wait for its SIGSTOP notification forever.  */
1637
1638static int
1639detach(struct tcb *tcp, int sig)
1640{
1641	int error = 0;
1642#ifdef LINUX
1643	int status, catch_sigstop;
1644#endif
1645
1646	if (tcp->flags & TCB_BPTSET)
1647		clearbpt(tcp);
1648
1649#ifdef LINUX
1650	/*
1651	 * Linux wrongly insists the child be stopped
1652	 * before detaching.  Arghh.  We go through hoops
1653	 * to make a clean break of things.
1654	 */
1655#if defined(SPARC)
1656#undef PTRACE_DETACH
1657#define PTRACE_DETACH PTRACE_SUNDETACH
1658#endif
1659	/*
1660	 * On TCB_STARTUP we did PTRACE_ATTACH but still did not get the
1661	 * expected SIGSTOP.  We must catch exactly one as otherwise the
1662	 * detached process would be left stopped (process state T).
1663	 */
1664	catch_sigstop = (tcp->flags & TCB_STARTUP);
1665	if ((error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, sig)) == 0) {
1666		/* On a clear day, you can see forever. */
1667	}
1668	else if (errno != ESRCH) {
1669		/* Shouldn't happen. */
1670		perror("detach: ptrace(PTRACE_DETACH, ...)");
1671	}
1672	else if (my_tkill(tcp->pid, 0) < 0) {
1673		if (errno != ESRCH)
1674			perror("detach: checking sanity");
1675	}
1676	else if (!catch_sigstop && my_tkill(tcp->pid, SIGSTOP) < 0) {
1677		if (errno != ESRCH)
1678			perror("detach: stopping child");
1679	}
1680	else
1681		catch_sigstop = 1;
1682	if (catch_sigstop) {
1683		for (;;) {
1684#ifdef __WALL
1685			if (wait4(tcp->pid, &status, __WALL, NULL) < 0) {
1686				if (errno == ECHILD) /* Already gone.  */
1687					break;
1688				if (errno != EINVAL) {
1689					perror("detach: waiting");
1690					break;
1691				}
1692#endif /* __WALL */
1693				/* No __WALL here.  */
1694				if (waitpid(tcp->pid, &status, 0) < 0) {
1695					if (errno != ECHILD) {
1696						perror("detach: waiting");
1697						break;
1698					}
1699#ifdef __WCLONE
1700					/* If no processes, try clones.  */
1701					if (wait4(tcp->pid, &status, __WCLONE,
1702						  NULL) < 0) {
1703						if (errno != ECHILD)
1704							perror("detach: waiting");
1705						break;
1706					}
1707#endif /* __WCLONE */
1708				}
1709#ifdef __WALL
1710			}
1711#endif
1712			if (!WIFSTOPPED(status)) {
1713				/* Au revoir, mon ami. */
1714				break;
1715			}
1716			if (WSTOPSIG(status) == SIGSTOP) {
1717				ptrace_restart(PTRACE_DETACH, tcp, sig);
1718				break;
1719			}
1720			error = ptrace_restart(PTRACE_CONT, tcp,
1721					WSTOPSIG(status) == syscall_trap_sig ? 0
1722					: WSTOPSIG(status));
1723			if (error < 0)
1724				break;
1725		}
1726	}
1727#endif /* LINUX */
1728
1729#if defined(SUNOS4)
1730	/* PTRACE_DETACH won't respect `sig' argument, so we post it here. */
1731	if (sig && kill(tcp->pid, sig) < 0)
1732		perror("detach: kill");
1733	sig = 0;
1734	error = ptrace_restart(PTRACE_DETACH, tcp, sig);
1735#endif /* SUNOS4 */
1736
1737	if (!qflag)
1738		fprintf(stderr, "Process %u detached\n", tcp->pid);
1739
1740	droptcb(tcp);
1741
1742	return error;
1743}
1744
1745#ifdef USE_PROCFS
1746
1747static void reaper(int sig)
1748{
1749	int pid;
1750	int status;
1751
1752	while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
1753	}
1754}
1755
1756#endif /* USE_PROCFS */
1757
1758static void
1759cleanup(void)
1760{
1761	int i;
1762	struct tcb *tcp;
1763
1764	for (i = 0; i < tcbtabsize; i++) {
1765		tcp = tcbtab[i];
1766		if (!(tcp->flags & TCB_INUSE))
1767			continue;
1768		if (debug)
1769			fprintf(stderr,
1770				"cleanup: looking at pid %u\n", tcp->pid);
1771		if (tcp_last &&
1772		    (!outfname || followfork < 2 || tcp_last == tcp)) {
1773			tprintf(" <unfinished ...>");
1774			printtrailer();
1775		}
1776		if (tcp->flags & TCB_ATTACHED)
1777			detach(tcp, 0);
1778		else {
1779			kill(tcp->pid, SIGCONT);
1780			kill(tcp->pid, SIGTERM);
1781		}
1782	}
1783	if (cflag)
1784		call_summary(outf);
1785}
1786
1787static void
1788interrupt(int sig)
1789{
1790	interrupted = 1;
1791}
1792
1793#ifndef HAVE_STRERROR
1794
1795#if !HAVE_DECL_SYS_ERRLIST
1796extern int sys_nerr;
1797extern char *sys_errlist[];
1798#endif /* HAVE_DECL_SYS_ERRLIST */
1799
1800const char *
1801strerror(int err_no)
1802{
1803	static char buf[64];
1804
1805	if (err_no < 1 || err_no >= sys_nerr) {
1806		sprintf(buf, "Unknown error %d", err_no);
1807		return buf;
1808	}
1809	return sys_errlist[err_no];
1810}
1811
1812#endif /* HAVE_STERRROR */
1813
1814#ifndef HAVE_STRSIGNAL
1815
1816#if defined HAVE_SYS_SIGLIST && !defined HAVE_DECL_SYS_SIGLIST
1817extern char *sys_siglist[];
1818#endif
1819#if defined HAVE_SYS__SIGLIST && !defined HAVE_DECL__SYS_SIGLIST
1820extern char *_sys_siglist[];
1821#endif
1822
1823const char *
1824strsignal(int sig)
1825{
1826	static char buf[64];
1827
1828	if (sig < 1 || sig >= NSIG) {
1829		sprintf(buf, "Unknown signal %d", sig);
1830		return buf;
1831	}
1832#ifdef HAVE__SYS_SIGLIST
1833	return _sys_siglist[sig];
1834#else
1835	return sys_siglist[sig];
1836#endif
1837}
1838
1839#endif /* HAVE_STRSIGNAL */
1840
1841#ifdef USE_PROCFS
1842
1843static void
1844rebuild_pollv(void)
1845{
1846	int i, j;
1847
1848	if (pollv != NULL)
1849		free(pollv);
1850	pollv = (struct pollfd *) malloc(nprocs * sizeof pollv[0]);
1851	if (pollv == NULL) {
1852		error_msg_and_die("Out of memory");
1853	}
1854
1855	for (i = j = 0; i < tcbtabsize; i++) {
1856		struct tcb *tcp = tcbtab[i];
1857		if (!(tcp->flags & TCB_INUSE))
1858			continue;
1859		pollv[j].fd = tcp->pfd;
1860		pollv[j].events = POLLWANT;
1861		j++;
1862	}
1863	if (j != nprocs) {
1864		error_msg_and_die("proc miscount");
1865	}
1866}
1867
1868#ifndef HAVE_POLLABLE_PROCFS
1869
1870static void
1871proc_poll_open(void)
1872{
1873	int i;
1874
1875	if (pipe(proc_poll_pipe) < 0) {
1876		perror_msg_and_die("pipe");
1877	}
1878	for (i = 0; i < 2; i++) {
1879		set_cloexec_flag(proc_poll_pipe[i]);
1880	}
1881}
1882
1883static int
1884proc_poll(struct pollfd *pollv, int nfds, int timeout)
1885{
1886	int i;
1887	int n;
1888	struct proc_pollfd pollinfo;
1889
1890	if ((n = read(proc_poll_pipe[0], &pollinfo, sizeof(pollinfo))) < 0)
1891		return n;
1892	if (n != sizeof(struct proc_pollfd)) {
1893		error_msg_and_die("panic: short read: %d", n);
1894	}
1895	for (i = 0; i < nprocs; i++) {
1896		if (pollv[i].fd == pollinfo.fd)
1897			pollv[i].revents = pollinfo.revents;
1898		else
1899			pollv[i].revents = 0;
1900	}
1901	poller_pid = pollinfo.pid;
1902	return 1;
1903}
1904
1905static void
1906wakeup_handler(int sig)
1907{
1908}
1909
1910static void
1911proc_poller(int pfd)
1912{
1913	struct proc_pollfd pollinfo;
1914	struct sigaction sa;
1915	sigset_t blocked_set, empty_set;
1916	int i;
1917	int n;
1918	struct rlimit rl;
1919#ifdef FREEBSD
1920	struct procfs_status pfs;
1921#endif /* FREEBSD */
1922
1923	switch (fork()) {
1924	case -1:
1925		perror_msg_and_die("fork");
1926	case 0:
1927		break;
1928	default:
1929		return;
1930	}
1931
1932	sa.sa_handler = interactive ? SIG_DFL : SIG_IGN;
1933	sa.sa_flags = 0;
1934	sigemptyset(&sa.sa_mask);
1935	sigaction(SIGHUP, &sa, NULL);
1936	sigaction(SIGINT, &sa, NULL);
1937	sigaction(SIGQUIT, &sa, NULL);
1938	sigaction(SIGPIPE, &sa, NULL);
1939	sigaction(SIGTERM, &sa, NULL);
1940	sa.sa_handler = wakeup_handler;
1941	sigaction(SIGUSR1, &sa, NULL);
1942	sigemptyset(&blocked_set);
1943	sigaddset(&blocked_set, SIGUSR1);
1944	sigprocmask(SIG_BLOCK, &blocked_set, NULL);
1945	sigemptyset(&empty_set);
1946
1947	if (getrlimit(RLIMIT_NOFILE, &rl) < 0) {
1948		perror_msg_and_die("getrlimit(RLIMIT_NOFILE, ...)");
1949	}
1950	n = rl.rlim_cur;
1951	for (i = 0; i < n; i++) {
1952		if (i != pfd && i != proc_poll_pipe[1])
1953			close(i);
1954	}
1955
1956	pollinfo.fd = pfd;
1957	pollinfo.pid = getpid();
1958	for (;;) {
1959#ifndef FREEBSD
1960		if (ioctl(pfd, PIOCWSTOP, NULL) < 0)
1961#else
1962		if (ioctl(pfd, PIOCWSTOP, &pfs) < 0)
1963#endif
1964		{
1965			switch (errno) {
1966			case EINTR:
1967				continue;
1968			case EBADF:
1969				pollinfo.revents = POLLERR;
1970				break;
1971			case ENOENT:
1972				pollinfo.revents = POLLHUP;
1973				break;
1974			default:
1975				perror("proc_poller: PIOCWSTOP");
1976			}
1977			write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
1978			_exit(0);
1979		}
1980		pollinfo.revents = POLLWANT;
1981		write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
1982		sigsuspend(&empty_set);
1983	}
1984}
1985
1986#endif /* !HAVE_POLLABLE_PROCFS */
1987
1988static int
1989choose_pfd()
1990{
1991	int i, j;
1992	struct tcb *tcp;
1993
1994	static int last;
1995
1996	if (followfork < 2 &&
1997	    last < nprocs && (pollv[last].revents & POLLWANT)) {
1998		/*
1999		 * The previous process is ready to run again.  We'll
2000		 * let it do so if it is currently in a syscall.  This
2001		 * heuristic improves the readability of the trace.
2002		 */
2003		tcp = pfd2tcb(pollv[last].fd);
2004		if (tcp && (tcp->flags & TCB_INSYSCALL))
2005			return pollv[last].fd;
2006	}
2007
2008	for (i = 0; i < nprocs; i++) {
2009		/* Let competing children run round robin. */
2010		j = (i + last + 1) % nprocs;
2011		if (pollv[j].revents & (POLLHUP | POLLERR)) {
2012			tcp = pfd2tcb(pollv[j].fd);
2013			if (!tcp) {
2014				error_msg_and_die("lost proc");
2015			}
2016			droptcb(tcp);
2017			return -1;
2018		}
2019		if (pollv[j].revents & POLLWANT) {
2020			last = j;
2021			return pollv[j].fd;
2022		}
2023	}
2024	error_msg_and_die("nothing ready");
2025}
2026
2027static int
2028trace(void)
2029{
2030#ifdef POLL_HACK
2031	struct tcb *in_syscall = NULL;
2032#endif
2033	struct tcb *tcp;
2034	int pfd;
2035	int what;
2036	int ioctl_result = 0, ioctl_errno = 0;
2037	long arg;
2038
2039	for (;;) {
2040		if (interactive)
2041			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2042
2043		if (nprocs == 0)
2044			break;
2045
2046		switch (nprocs) {
2047		case 1:
2048#ifndef HAVE_POLLABLE_PROCFS
2049			if (proc_poll_pipe[0] == -1) {
2050#endif
2051				tcp = first_used_tcb();
2052				if (!tcp)
2053					continue;
2054				pfd = tcp->pfd;
2055				if (pfd == -1)
2056					continue;
2057				break;
2058#ifndef HAVE_POLLABLE_PROCFS
2059			}
2060			/* fall through ... */
2061#endif /* !HAVE_POLLABLE_PROCFS */
2062		default:
2063#ifdef HAVE_POLLABLE_PROCFS
2064#ifdef POLL_HACK
2065		        /* On some systems (e.g. UnixWare) we get too much ugly
2066			   "unfinished..." stuff when multiple proceses are in
2067			   syscalls.  Here's a nasty hack */
2068
2069			if (in_syscall) {
2070				struct pollfd pv;
2071				tcp = in_syscall;
2072				in_syscall = NULL;
2073				pv.fd = tcp->pfd;
2074				pv.events = POLLWANT;
2075				if ((what = poll(&pv, 1, 1)) < 0) {
2076					if (interrupted)
2077						return 0;
2078					continue;
2079				}
2080				else if (what == 1 && pv.revents & POLLWANT) {
2081					goto FOUND;
2082				}
2083			}
2084#endif
2085
2086			if (poll(pollv, nprocs, INFTIM) < 0) {
2087				if (interrupted)
2088					return 0;
2089				continue;
2090			}
2091#else /* !HAVE_POLLABLE_PROCFS */
2092			if (proc_poll(pollv, nprocs, INFTIM) < 0) {
2093				if (interrupted)
2094					return 0;
2095				continue;
2096			}
2097#endif /* !HAVE_POLLABLE_PROCFS */
2098			pfd = choose_pfd();
2099			if (pfd == -1)
2100				continue;
2101			break;
2102		}
2103
2104		/* Look up `pfd' in our table. */
2105		if ((tcp = pfd2tcb(pfd)) == NULL) {
2106			error_msg_and_die("unknown pfd: %u", pfd);
2107		}
2108#ifdef POLL_HACK
2109	FOUND:
2110#endif
2111		/* Get the status of the process. */
2112		if (!interrupted) {
2113#ifndef FREEBSD
2114			ioctl_result = IOCTL_WSTOP(tcp);
2115#else /* FREEBSD */
2116			/* Thanks to some scheduling mystery, the first poller
2117			   sometimes waits for the already processed end of fork
2118			   event. Doing a non blocking poll here solves the problem. */
2119			if (proc_poll_pipe[0] != -1)
2120				ioctl_result = IOCTL_STATUS(tcp);
2121			else
2122				ioctl_result = IOCTL_WSTOP(tcp);
2123#endif /* FREEBSD */
2124			ioctl_errno = errno;
2125#ifndef HAVE_POLLABLE_PROCFS
2126			if (proc_poll_pipe[0] != -1) {
2127				if (ioctl_result < 0)
2128					kill(poller_pid, SIGKILL);
2129				else
2130					kill(poller_pid, SIGUSR1);
2131			}
2132#endif /* !HAVE_POLLABLE_PROCFS */
2133		}
2134		if (interrupted)
2135			return 0;
2136
2137		if (interactive)
2138			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2139
2140		if (ioctl_result < 0) {
2141			/* Find out what happened if it failed. */
2142			switch (ioctl_errno) {
2143			case EINTR:
2144			case EBADF:
2145				continue;
2146#ifdef FREEBSD
2147			case ENOTTY:
2148#endif
2149			case ENOENT:
2150				droptcb(tcp);
2151				continue;
2152			default:
2153				perror_msg_and_die("PIOCWSTOP");
2154			}
2155		}
2156
2157#ifdef FREEBSD
2158		if ((tcp->flags & TCB_STARTUP) && (tcp->status.PR_WHY == PR_SYSEXIT)) {
2159			/* discard first event for a syscall we never entered */
2160			IOCTL(tcp->pfd, PIOCRUN, 0);
2161			continue;
2162		}
2163#endif
2164
2165		/* clear the just started flag */
2166		tcp->flags &= ~TCB_STARTUP;
2167
2168		/* set current output file */
2169		outf = tcp->outf;
2170		curcol = tcp->curcol;
2171
2172		if (cflag) {
2173			struct timeval stime;
2174#ifdef FREEBSD
2175			char buf[1024];
2176			int len;
2177
2178			if ((len = pread(tcp->pfd_status, buf, sizeof(buf) - 1, 0)) > 0) {
2179				buf[len] = '\0';
2180				sscanf(buf,
2181				       "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d %*d,%*d %ld,%ld",
2182				       &stime.tv_sec, &stime.tv_usec);
2183			} else
2184				stime.tv_sec = stime.tv_usec = 0;
2185#else /* !FREEBSD */
2186			stime.tv_sec = tcp->status.pr_stime.tv_sec;
2187			stime.tv_usec = tcp->status.pr_stime.tv_nsec/1000;
2188#endif /* !FREEBSD */
2189			tv_sub(&tcp->dtime, &stime, &tcp->stime);
2190			tcp->stime = stime;
2191		}
2192		what = tcp->status.PR_WHAT;
2193		switch (tcp->status.PR_WHY) {
2194#ifndef FREEBSD
2195		case PR_REQUESTED:
2196			if (tcp->status.PR_FLAGS & PR_ASLEEP) {
2197				tcp->status.PR_WHY = PR_SYSENTRY;
2198				if (trace_syscall(tcp) < 0) {
2199					error_msg_and_die("syscall trouble");
2200				}
2201			}
2202			break;
2203#endif /* !FREEBSD */
2204		case PR_SYSENTRY:
2205#ifdef POLL_HACK
2206		        in_syscall = tcp;
2207#endif
2208		case PR_SYSEXIT:
2209			if (trace_syscall(tcp) < 0) {
2210				error_msg_and_die("syscall trouble");
2211			}
2212			break;
2213		case PR_SIGNALLED:
2214			if (cflag != CFLAG_ONLY_STATS
2215			    && (qual_flags[what] & QUAL_SIGNAL)) {
2216				printleader(tcp);
2217				tprintf("--- %s (%s) ---",
2218					signame(what), strsignal(what));
2219				printtrailer();
2220#ifdef PR_INFO
2221				if (tcp->status.PR_INFO.si_signo == what) {
2222					printleader(tcp);
2223					tprintf("    siginfo=");
2224					printsiginfo(&tcp->status.PR_INFO, 1);
2225					printtrailer();
2226				}
2227#endif
2228			}
2229			break;
2230		case PR_FAULTED:
2231			if (cflag != CFLAGS_ONLY_STATS
2232			    && (qual_flags[what] & QUAL_FAULT)) {
2233				printleader(tcp);
2234				tprintf("=== FAULT %d ===", what);
2235				printtrailer();
2236			}
2237			break;
2238#ifdef FREEBSD
2239		case 0: /* handle case we polled for nothing */
2240			continue;
2241#endif
2242		default:
2243			error_msg_and_die("odd stop %d", tcp->status.PR_WHY);
2244			break;
2245		}
2246		/* Remember current print column before continuing. */
2247		tcp->curcol = curcol;
2248		arg = 0;
2249#ifndef FREEBSD
2250		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0)
2251#else
2252		if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0)
2253#endif
2254		{
2255			perror_msg_and_die("PIOCRUN");
2256		}
2257	}
2258	return 0;
2259}
2260
2261#else /* !USE_PROCFS */
2262
2263static int
2264trace()
2265{
2266	int pid;
2267	int wait_errno;
2268	int status;
2269	struct tcb *tcp;
2270#ifdef LINUX
2271	struct rusage ru;
2272	struct rusage *rup = cflag ? &ru : NULL;
2273# ifdef __WALL
2274	static int wait4_options = __WALL;
2275# endif
2276#endif /* LINUX */
2277
2278	while (nprocs != 0) {
2279		if (interrupted)
2280			return 0;
2281		if (interactive)
2282			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2283#ifdef LINUX
2284# ifdef __WALL
2285		pid = wait4(-1, &status, wait4_options, rup);
2286		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
2287			/* this kernel does not support __WALL */
2288			wait4_options &= ~__WALL;
2289			pid = wait4(-1, &status, wait4_options, rup);
2290		}
2291		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
2292			/* most likely a "cloned" process */
2293			pid = wait4(-1, &status, __WCLONE, rup);
2294			if (pid < 0) {
2295				perror_msg("wait4(__WCLONE) failed");
2296			}
2297		}
2298# else
2299		pid = wait4(-1, &status, 0, rup);
2300# endif /* __WALL */
2301#endif /* LINUX */
2302#ifdef SUNOS4
2303		pid = wait(&status);
2304#endif
2305		wait_errno = errno;
2306		if (interactive)
2307			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2308
2309		if (pid < 0) {
2310			switch (wait_errno) {
2311			case EINTR:
2312				continue;
2313			case ECHILD:
2314				/*
2315				 * We would like to verify this case
2316				 * but sometimes a race in Solbourne's
2317				 * version of SunOS sometimes reports
2318				 * ECHILD before sending us SIGCHILD.
2319				 */
2320				return 0;
2321			default:
2322				errno = wait_errno;
2323				perror("strace: wait");
2324				return -1;
2325			}
2326		}
2327		if (pid == popen_pid) {
2328			if (WIFEXITED(status) || WIFSIGNALED(status))
2329				popen_pid = 0;
2330			continue;
2331		}
2332		if (debug) {
2333			char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
2334#ifdef LINUX
2335			unsigned ev = (unsigned)status >> 16;
2336			if (ev) {
2337				static const char *const event_names[] = {
2338					[PTRACE_EVENT_CLONE] = "CLONE",
2339					[PTRACE_EVENT_FORK]  = "FORK",
2340					[PTRACE_EVENT_VFORK] = "VFORK",
2341					[PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
2342					[PTRACE_EVENT_EXEC]  = "EXEC",
2343					[PTRACE_EVENT_EXIT]  = "EXIT",
2344				};
2345				const char *e;
2346				if (ev < ARRAY_SIZE(event_names))
2347					e = event_names[ev];
2348				else {
2349					sprintf(buf, "?? (%u)", ev);
2350					e = buf;
2351				}
2352				fprintf(stderr, " PTRACE_EVENT_%s", e);
2353			}
2354#endif
2355			strcpy(buf, "???");
2356			if (WIFSIGNALED(status))
2357#ifdef WCOREDUMP
2358				sprintf(buf, "WIFSIGNALED,%ssig=%s",
2359						WCOREDUMP(status) ? "core," : "",
2360						signame(WTERMSIG(status)));
2361#else
2362				sprintf(buf, "WIFSIGNALED,sig=%s",
2363						signame(WTERMSIG(status)));
2364#endif
2365			if (WIFEXITED(status))
2366				sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
2367			if (WIFSTOPPED(status))
2368				sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
2369#ifdef WIFCONTINUED
2370			if (WIFCONTINUED(status))
2371				strcpy(buf, "WIFCONTINUED");
2372#endif
2373			fprintf(stderr, " [wait(0x%04x) = %u] %s\n", status, pid, buf);
2374		}
2375
2376		/* Look up `pid' in our table. */
2377		if ((tcp = pid2tcb(pid)) == NULL) {
2378#ifdef LINUX
2379			if (followfork) {
2380				/* This is needed to go with the CLONE_PTRACE
2381				   changes in process.c/util.c: we might see
2382				   the child's initial trap before we see the
2383				   parent return from the clone syscall.
2384				   Leave the child suspended until the parent
2385				   returns from its system call.  Only then
2386				   will we have the association of parent and
2387				   child so that we know how to do clearbpt
2388				   in the child.  */
2389				tcp = alloctcb(pid);
2390				tcp->flags |= TCB_ATTACHED;
2391				if (!qflag)
2392					fprintf(stderr, "Process %d attached\n",
2393						pid);
2394			}
2395			else
2396				/* This can happen if a clone call used
2397				   CLONE_PTRACE itself.  */
2398#endif
2399			{
2400				if (WIFSTOPPED(status))
2401					ptrace(PTRACE_CONT, pid, (char *) 1, 0);
2402				error_msg_and_die("Unknown pid: %u", pid);
2403			}
2404		}
2405		/* set current output file */
2406		outf = tcp->outf;
2407		curcol = tcp->curcol;
2408#ifdef LINUX
2409		if (cflag) {
2410			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
2411			tcp->stime = ru.ru_stime;
2412		}
2413#endif
2414
2415		if (WIFSIGNALED(status)) {
2416			if (pid == strace_child)
2417				exit_code = 0x100 | WTERMSIG(status);
2418			if (cflag != CFLAG_ONLY_STATS
2419			    && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL)) {
2420				printleader(tcp);
2421#ifdef WCOREDUMP
2422				tprintf("+++ killed by %s %s+++",
2423					signame(WTERMSIG(status)),
2424					WCOREDUMP(status) ? "(core dumped) " : "");
2425#else
2426				tprintf("+++ killed by %s +++",
2427					signame(WTERMSIG(status)));
2428#endif
2429				printtrailer();
2430			}
2431			droptcb(tcp);
2432			continue;
2433		}
2434		if (WIFEXITED(status)) {
2435			if (pid == strace_child)
2436				exit_code = WEXITSTATUS(status);
2437			if (tcp == tcp_last) {
2438				if ((tcp->flags & (TCB_INSYSCALL|TCB_REPRINT)) == TCB_INSYSCALL)
2439					tprintf(" <unfinished ... exit status %d>\n",
2440						WEXITSTATUS(status));
2441				tcp_last = NULL;
2442			}
2443			if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
2444				printleader(tcp);
2445				tprintf("+++ exited with %d +++", WEXITSTATUS(status));
2446				printtrailer();
2447			}
2448			droptcb(tcp);
2449			continue;
2450		}
2451		if (!WIFSTOPPED(status)) {
2452			fprintf(stderr, "PANIC: pid %u not stopped\n", pid);
2453			droptcb(tcp);
2454			continue;
2455		}
2456
2457		if (status >> 16) {
2458			/* Ptrace event (we ignore all of them for now) */
2459			goto tracing;
2460		}
2461
2462		/*
2463		 * Interestingly, the process may stop
2464		 * with STOPSIG equal to some other signal
2465		 * than SIGSTOP if we happend to attach
2466		 * just before the process takes a signal.
2467		 * A no-MMU vforked child won't send up a signal,
2468		 * so skip the first (lost) execve notification.
2469		 */
2470		if ((tcp->flags & TCB_STARTUP) &&
2471		    (WSTOPSIG(status) == SIGSTOP || strace_vforked)) {
2472			/*
2473			 * This flag is there to keep us in sync.
2474			 * Next time this process stops it should
2475			 * really be entering a system call.
2476			 */
2477			tcp->flags &= ~TCB_STARTUP;
2478			if (tcp->flags & TCB_BPTSET) {
2479				/*
2480				 * One example is a breakpoint inherited from
2481				 * parent through fork ().
2482				 */
2483				if (clearbpt(tcp) < 0) /* Pretty fatal */ {
2484					droptcb(tcp);
2485					cleanup();
2486					return -1;
2487				}
2488			}
2489#ifdef LINUX
2490			if (ptrace_setoptions) {
2491				if (debug)
2492					fprintf(stderr, "setting opts %x on pid %d\n", ptrace_setoptions, tcp->pid);
2493				if (ptrace(PTRACE_SETOPTIONS, tcp->pid, NULL, ptrace_setoptions) < 0) {
2494					if (errno != ESRCH) {
2495						/* Should never happen, really */
2496						perror_msg_and_die("PTRACE_SETOPTIONS");
2497					}
2498				}
2499			}
2500#endif
2501			goto tracing;
2502		}
2503
2504		if (WSTOPSIG(status) != syscall_trap_sig) {
2505			if (WSTOPSIG(status) == SIGSTOP &&
2506					(tcp->flags & TCB_SIGTRAPPED)) {
2507				/*
2508				 * Trapped attempt to block SIGTRAP
2509				 * Hope we are back in control now.
2510				 */
2511				tcp->flags &= ~(TCB_INSYSCALL | TCB_SIGTRAPPED);
2512				if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
2513					cleanup();
2514					return -1;
2515				}
2516				continue;
2517			}
2518			if (cflag != CFLAG_ONLY_STATS
2519			    && (qual_flags[WSTOPSIG(status)] & QUAL_SIGNAL)) {
2520				siginfo_t si;
2521#if defined(PT_CR_IPSR) && defined(PT_CR_IIP)
2522				long pc = 0;
2523				long psr = 0;
2524
2525				upeek(tcp, PT_CR_IPSR, &psr);
2526				upeek(tcp, PT_CR_IIP, &pc);
2527
2528# define PSR_RI	41
2529				pc += (psr >> PSR_RI) & 0x3;
2530# define PC_FORMAT_STR	" @ %lx"
2531# define PC_FORMAT_ARG	pc
2532#else
2533# define PC_FORMAT_STR	"%s"
2534# define PC_FORMAT_ARG	""
2535#endif
2536				printleader(tcp);
2537				if (ptrace(PTRACE_GETSIGINFO, pid, 0, &si) == 0) {
2538					tprintf("--- ");
2539					printsiginfo(&si, verbose(tcp));
2540					tprintf(" (%s)" PC_FORMAT_STR " ---",
2541						strsignal(WSTOPSIG(status)),
2542						PC_FORMAT_ARG);
2543				} else
2544					tprintf("--- %s by %s" PC_FORMAT_STR " ---",
2545						strsignal(WSTOPSIG(status)),
2546						signame(WSTOPSIG(status)),
2547						PC_FORMAT_ARG);
2548				printtrailer();
2549			}
2550			if (ptrace_restart(PTRACE_SYSCALL, tcp, WSTOPSIG(status)) < 0) {
2551				cleanup();
2552				return -1;
2553			}
2554			continue;
2555		}
2556		/* we handled the STATUS, we are permitted to interrupt now. */
2557		if (interrupted)
2558			return 0;
2559		if (trace_syscall(tcp) < 0 && !tcp->ptrace_errno) {
2560			/* ptrace() failed in trace_syscall() with ESRCH.
2561			 * Likely a result of process disappearing mid-flight.
2562			 * Observed case: exit_group() terminating
2563			 * all processes in thread group. In this case, threads
2564			 * "disappear" in an unpredictable moment without any
2565			 * notification to strace via wait().
2566			 */
2567			if (tcp->flags & TCB_ATTACHED) {
2568				if (tcp_last) {
2569					/* Do we have dangling line "syscall(param, param"?
2570					 * Finish the line then.
2571					 */
2572					tcp_last->flags |= TCB_REPRINT;
2573					tprintf(" <unfinished ...>");
2574					printtrailer();
2575				}
2576				detach(tcp, 0);
2577			} else {
2578				ptrace(PTRACE_KILL,
2579					tcp->pid, (char *) 1, SIGTERM);
2580				droptcb(tcp);
2581			}
2582			continue;
2583		}
2584	tracing:
2585		/* Remember current print column before continuing. */
2586		tcp->curcol = curcol;
2587		if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) {
2588			cleanup();
2589			return -1;
2590		}
2591	}
2592	return 0;
2593}
2594
2595#endif /* !USE_PROCFS */
2596
2597void
2598tprintf(const char *fmt, ...)
2599{
2600	va_list args;
2601
2602	va_start(args, fmt);
2603	if (outf) {
2604		int n = vfprintf(outf, fmt, args);
2605		if (n < 0) {
2606			if (outf != stderr)
2607				perror(outfname == NULL
2608				       ? "<writing to pipe>" : outfname);
2609		} else
2610			curcol += n;
2611	}
2612	va_end(args);
2613	return;
2614}
2615
2616void
2617printleader(struct tcb *tcp)
2618{
2619	if (tcp_last) {
2620		if (tcp_last->ptrace_errno) {
2621			if (tcp_last->flags & TCB_INSYSCALL) {
2622				tprintf(" <unavailable>) ");
2623				tabto(acolumn);
2624			}
2625			tprintf("= ? <unavailable>\n");
2626			tcp_last->ptrace_errno = 0;
2627		} else if (!outfname || followfork < 2 || tcp_last == tcp) {
2628			tcp_last->flags |= TCB_REPRINT;
2629			tprintf(" <unfinished ...>\n");
2630		}
2631	}
2632	curcol = 0;
2633	if ((followfork == 1 || pflag_seen > 1) && outfname)
2634		tprintf("%-5d ", tcp->pid);
2635	else if (nprocs > 1 && !outfname)
2636		tprintf("[pid %5u] ", tcp->pid);
2637	if (tflag) {
2638		char str[sizeof("HH:MM:SS")];
2639		struct timeval tv, dtv;
2640		static struct timeval otv;
2641
2642		gettimeofday(&tv, NULL);
2643		if (rflag) {
2644			if (otv.tv_sec == 0)
2645				otv = tv;
2646			tv_sub(&dtv, &tv, &otv);
2647			tprintf("%6ld.%06ld ",
2648				(long) dtv.tv_sec, (long) dtv.tv_usec);
2649			otv = tv;
2650		}
2651		else if (tflag > 2) {
2652			tprintf("%ld.%06ld ",
2653				(long) tv.tv_sec, (long) tv.tv_usec);
2654		}
2655		else {
2656			time_t local = tv.tv_sec;
2657			strftime(str, sizeof(str), "%T", localtime(&local));
2658			if (tflag > 1)
2659				tprintf("%s.%06ld ", str, (long) tv.tv_usec);
2660			else
2661				tprintf("%s ", str);
2662		}
2663	}
2664	if (iflag)
2665		printcall(tcp);
2666}
2667
2668void
2669tabto(int col)
2670{
2671	if (curcol < col)
2672		tprintf("%*s", col - curcol, "");
2673}
2674
2675void
2676printtrailer(void)
2677{
2678	tprintf("\n");
2679	tcp_last = NULL;
2680}
2681
2682#ifdef HAVE_MP_PROCFS
2683
2684int
2685mp_ioctl(int fd, int cmd, void *arg, int size)
2686{
2687	struct iovec iov[2];
2688	int n = 1;
2689
2690	iov[0].iov_base = &cmd;
2691	iov[0].iov_len = sizeof cmd;
2692	if (arg) {
2693		++n;
2694		iov[1].iov_base = arg;
2695		iov[1].iov_len = size;
2696	}
2697
2698	return writev(fd, iov, n);
2699}
2700
2701#endif
2702