strace.c revision 4f3df078b26899afe0f25d8651b06a5a5b5143e2
1/*
2 * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3 * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4 * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5 * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 *	$Id$
31 */
32
33#include "defs.h"
34
35#include <sys/types.h>
36#include <stdarg.h>
37#include <signal.h>
38#include <errno.h>
39#include <sys/param.h>
40#include <fcntl.h>
41#include <sys/resource.h>
42#include <sys/wait.h>
43#include <sys/stat.h>
44#include <pwd.h>
45#include <grp.h>
46#include <string.h>
47#include <dirent.h>
48#include <sys/utsname.h>
49
50#ifdef LINUX
51# include <asm/unistd.h>
52# if defined __NR_tkill
53#  define my_tkill(tid, sig) syscall(__NR_tkill, (tid), (sig))
54# else
55   /* kill() may choose arbitrarily the target task of the process group
56      while we later wait on a that specific TID.  PID process waits become
57      TID task specific waits for a process under ptrace(2).  */
58#  warning "Neither tkill(2) nor tgkill(2) available, risk of strace hangs!"
59#  define my_tkill(tid, sig) kill((tid), (sig))
60# endif
61#endif
62
63#if defined(IA64) && defined(LINUX)
64# include <asm/ptrace_offsets.h>
65#endif
66
67#ifdef USE_PROCFS
68#include <poll.h>
69#endif
70
71#ifdef SVR4
72#include <sys/stropts.h>
73#ifdef HAVE_MP_PROCFS
74#ifdef HAVE_SYS_UIO_H
75#include <sys/uio.h>
76#endif
77#endif
78#endif
79extern char **environ;
80extern int optind;
81extern char *optarg;
82
83
84int debug = 0, followfork = 0;
85unsigned int ptrace_setoptions = 0;
86/* Which WSTOPSIG(status) value marks syscall traps? */
87static unsigned int syscall_trap_sig = SIGTRAP;
88int dtime = 0, xflag = 0, qflag = 0;
89cflag_t cflag = CFLAG_NONE;
90static int iflag = 0, pflag_seen = 0, rflag = 0, tflag = 0;
91
92/* -I n */
93enum {
94    INTR_NOT_SET        = 0,
95    INTR_ANYWHERE       = 1, /* don't block/ignore any signals */
96    INTR_WHILE_WAIT     = 2, /* block fatal signals while decoding syscall. default */
97    INTR_NEVER          = 3, /* block fatal signals. default if '-o FILE PROG' */
98    INTR_BLOCK_TSTP_TOO = 4, /* block fatal signals and SIGTSTP (^Z) */
99    NUM_INTR_OPTS
100};
101static int opt_intr;
102/* We play with signal mask only if this mode is active: */
103#define interactive (opt_intr == INTR_WHILE_WAIT)
104
105/*
106 * daemonized_tracer supports -D option.
107 * With this option, strace forks twice.
108 * Unlike normal case, with -D *grandparent* process exec's,
109 * becoming a traced process. Child exits (this prevents traced process
110 * from having children it doesn't expect to have), and grandchild
111 * attaches to grandparent similarly to strace -p PID.
112 * This allows for more transparent interaction in cases
113 * when process and its parent are communicating via signals,
114 * wait() etc. Without -D, strace process gets lodged in between,
115 * disrupting parent<->child link.
116 */
117static bool daemonized_tracer = 0;
118
119#ifdef USE_SEIZE
120static int post_attach_sigstop = TCB_IGNORE_ONE_SIGSTOP;
121# define use_seize (post_attach_sigstop == 0)
122#else
123# define post_attach_sigstop TCB_IGNORE_ONE_SIGSTOP
124# define use_seize 0
125#endif
126
127/* Sometimes we want to print only succeeding syscalls. */
128int not_failing_only = 0;
129
130/* Show path associated with fd arguments */
131int show_fd_path = 0;
132
133/* are we filtering traces based on paths? */
134int tracing_paths = 0;
135
136static int exit_code = 0;
137static int strace_child = 0;
138static int strace_tracer_pid = 0;
139
140static char *username = NULL;
141static uid_t run_uid;
142static gid_t run_gid;
143
144int max_strlen = DEFAULT_STRLEN;
145static int acolumn = DEFAULT_ACOLUMN;
146static char *acolumn_spaces;
147static char *outfname = NULL;
148static FILE *outf;
149struct tcb *printing_tcp = NULL;
150static int curcol;
151static struct tcb **tcbtab;
152static unsigned int nprocs, tcbtabsize;
153static const char *progname;
154
155static char *os_release; /* from uname() */
156
157static int detach(struct tcb *tcp);
158static int trace(void);
159static void cleanup(void);
160static void interrupt(int sig);
161static sigset_t empty_set, blocked_set;
162
163#ifdef HAVE_SIG_ATOMIC_T
164static volatile sig_atomic_t interrupted;
165#else
166static volatile int interrupted;
167#endif
168
169#ifdef USE_PROCFS
170
171static struct tcb *pfd2tcb(int pfd);
172static void reaper(int sig);
173static void rebuild_pollv(void);
174static struct pollfd *pollv;
175
176#ifndef HAVE_POLLABLE_PROCFS
177
178static void proc_poll_open(void);
179static void proc_poller(int pfd);
180
181struct proc_pollfd {
182	int fd;
183	int revents;
184	int pid;
185};
186
187static int poller_pid;
188static int proc_poll_pipe[2] = { -1, -1 };
189
190#endif /* !HAVE_POLLABLE_PROCFS */
191
192#ifdef HAVE_MP_PROCFS
193#define POLLWANT	POLLWRNORM
194#else
195#define POLLWANT	POLLPRI
196#endif
197#endif /* USE_PROCFS */
198
199static void
200usage(FILE *ofp, int exitval)
201{
202	fprintf(ofp, "\
203usage: strace [-CdDffhiqrtttTvVxxy] [-I n] [-a column] [-e expr]... [-o file]\n\
204              [-p pid]... [-s strsize] [-u username] [-E var=val]...\n\
205              [-P path] [PROG [ARGS]]]\n\
206   or: strace -c [-D] [-I n] [-e expr]... [-O overhead] [-S sortby] [-E var=val]...\n\
207              [PROG [ARGS]]]\n\
208-c -- count time, calls, and errors for each syscall and report summary\n\
209-C -- like -c but also print regular output while processes are running\n\
210-D -- run tracer process as a detached grandchild, not as parent\n\
211-f -- follow forks, -ff -- with output into separate files\n\
212-F -- attempt to follow vforks, -h -- print help message\n\
213-i -- print instruction pointer at time of syscall\n\
214-I interruptible\n\
215   1: no signals are blocked\n\
216   2: fatal signals are blocked while decoding syscall (default)\n\
217   3: fatal signals are always blocked (default if '-o FILE PROG')\n\
218   4: fatal signals and SIGTSTP (^Z) are always blocked\n\
219      (useful to make 'strace -o FILE PROG' not stop on ^Z)\n\
220-q -- suppress messages about attaching, detaching, etc.\n\
221-r -- print relative timestamp, -t -- absolute timestamp, -tt -- with usecs\n\
222-T -- print time spent in each syscall, -V -- print version\n\
223-v -- verbose mode: print unabbreviated argv, stat, termio[s], etc. args\n\
224-x -- print non-ascii strings in hex, -xx -- print all strings in hex\n\
225-y -- print paths associated with file descriptor arguments\n\
226-a column -- alignment COLUMN for printing syscall results (default %d)\n\
227-e expr -- a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
228   options: trace, abbrev, verbose, raw, signal, read, or write\n\
229-o file -- send trace output to FILE instead of stderr\n\
230-O overhead -- set overhead for tracing syscalls to OVERHEAD usecs\n\
231-p pid -- trace process with process id PID, may be repeated\n\
232-s strsize -- limit length of print strings to STRSIZE chars (default %d)\n\
233-S sortby -- sort syscall counts by: time, calls, name, nothing (default %s)\n\
234-u username -- run command as username handling setuid and/or setgid\n\
235-E var=val -- put var=val in the environment for command\n\
236-E var -- remove var from the environment for command\n\
237-P path -- trace accesses to path\n\
238" /* this is broken, so don't document it
239-z -- print only succeeding syscalls\n\
240  */
241, DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
242	exit(exitval);
243}
244
245static void die(void) __attribute__ ((noreturn));
246static void die(void)
247{
248	if (strace_tracer_pid == getpid()) {
249		cflag = 0;
250		cleanup();
251	}
252	exit(1);
253}
254
255static void verror_msg(int err_no, const char *fmt, va_list p)
256{
257	char *msg;
258
259	fflush(NULL);
260
261	/* We want to print entire message with single fprintf to ensure
262	 * message integrity if stderr is shared with other programs.
263	 * Thus we use vasprintf + single fprintf.
264	 */
265	msg = NULL;
266	if (vasprintf(&msg, fmt, p) >= 0) {
267		if (err_no)
268			fprintf(stderr, "%s: %s: %s\n", progname, msg, strerror(err_no));
269		else
270			fprintf(stderr, "%s: %s\n", progname, msg);
271		free(msg);
272	} else {
273		/* malloc in vasprintf failed, try it without malloc */
274		fprintf(stderr, "%s: ", progname);
275		vfprintf(stderr, fmt, p);
276		if (err_no)
277			fprintf(stderr, ": %s\n", strerror(err_no));
278		else
279			putc('\n', stderr);
280	}
281	/* We don't switch stderr to buffered, thus fprintf(stderr)
282	 * always flushes its output and this is not necessary: */
283	/* fflush(stderr); */
284}
285
286void error_msg(const char *fmt, ...)
287{
288	va_list p;
289	va_start(p, fmt);
290	verror_msg(0, fmt, p);
291	va_end(p);
292}
293
294void error_msg_and_die(const char *fmt, ...)
295{
296	va_list p;
297	va_start(p, fmt);
298	verror_msg(0, fmt, p);
299	die();
300}
301
302void perror_msg(const char *fmt, ...)
303{
304	va_list p;
305	va_start(p, fmt);
306	verror_msg(errno, fmt, p);
307	va_end(p);
308}
309
310void perror_msg_and_die(const char *fmt, ...)
311{
312	va_list p;
313	va_start(p, fmt);
314	verror_msg(errno, fmt, p);
315	die();
316}
317
318void die_out_of_memory(void)
319{
320	static bool recursed = 0;
321	if (recursed)
322		exit(1);
323	recursed = 1;
324	error_msg_and_die("Out of memory");
325}
326
327#ifdef SVR4
328#ifdef MIPS
329void
330foobar()
331{
332}
333#endif /* MIPS */
334#endif /* SVR4 */
335
336/* Glue for systems without a MMU that cannot provide fork() */
337#ifdef HAVE_FORK
338# define strace_vforked 0
339#else
340# define strace_vforked 1
341# define fork()         vfork()
342#endif
343
344#ifdef USE_SEIZE
345static int
346ptrace_attach_or_seize(int pid)
347{
348	int r;
349	if (!use_seize)
350		return ptrace(PTRACE_ATTACH, pid, 0, 0);
351	r = ptrace(PTRACE_SEIZE, pid, 0, PTRACE_SEIZE_DEVEL);
352	if (r)
353		return r;
354	r = ptrace(PTRACE_INTERRUPT, pid, 0, 0);
355	return r;
356}
357#else
358# define ptrace_attach_or_seize(pid) ptrace(PTRACE_ATTACH, (pid), 0, 0)
359#endif
360
361static void
362set_cloexec_flag(int fd)
363{
364	int flags, newflags;
365
366	flags = fcntl(fd, F_GETFD);
367	if (flags < 0) {
368		/* Can happen only if fd is bad.
369		 * Should never happen: if it does, we have a bug
370		 * in the caller. Therefore we just abort
371		 * instead of propagating the error.
372		 */
373		perror_msg_and_die("fcntl(%d, F_GETFD)", fd);
374	}
375
376	newflags = flags | FD_CLOEXEC;
377	if (flags == newflags)
378		return;
379
380	fcntl(fd, F_SETFD, newflags); /* never fails */
381}
382
383/*
384 * When strace is setuid executable, we have to swap uids
385 * before and after filesystem and process management operations.
386 */
387static void
388swap_uid(void)
389{
390#ifndef SVR4
391	int euid = geteuid(), uid = getuid();
392
393	if (euid != uid && setreuid(euid, uid) < 0) {
394		perror_msg_and_die("setreuid");
395	}
396#endif
397}
398
399#if _LFS64_LARGEFILE
400# define fopen_for_output fopen64
401#else
402# define fopen_for_output fopen
403#endif
404
405static FILE *
406strace_fopen(const char *path)
407{
408	FILE *fp;
409
410	swap_uid();
411	fp = fopen_for_output(path, "w");
412	if (!fp)
413		perror_msg_and_die("Can't fopen '%s'", path);
414	swap_uid();
415	set_cloexec_flag(fileno(fp));
416	return fp;
417}
418
419static int popen_pid = 0;
420
421#ifndef _PATH_BSHELL
422# define _PATH_BSHELL "/bin/sh"
423#endif
424
425/*
426 * We cannot use standard popen(3) here because we have to distinguish
427 * popen child process from other processes we trace, and standard popen(3)
428 * does not export its child's pid.
429 */
430static FILE *
431strace_popen(const char *command)
432{
433	FILE *fp;
434	int fds[2];
435
436	swap_uid();
437	if (pipe(fds) < 0)
438		perror_msg_and_die("pipe");
439
440	set_cloexec_flag(fds[1]); /* never fails */
441
442	popen_pid = vfork();
443	if (popen_pid == -1)
444		perror_msg_and_die("vfork");
445
446	if (popen_pid == 0) {
447		/* child */
448		close(fds[1]);
449		if (fds[0] != 0) {
450			if (dup2(fds[0], 0))
451				perror_msg_and_die("dup2");
452			close(fds[0]);
453		}
454		execl(_PATH_BSHELL, "sh", "-c", command, NULL);
455		perror_msg_and_die("Can't execute '%s'", _PATH_BSHELL);
456	}
457
458	/* parent */
459	close(fds[0]);
460	swap_uid();
461	fp = fdopen(fds[1], "w");
462	if (!fp)
463		die_out_of_memory();
464	return fp;
465}
466
467static void
468newoutf(struct tcb *tcp)
469{
470	if (outfname && followfork > 1) {
471		char name[520 + sizeof(int) * 3];
472		sprintf(name, "%.512s.%u", outfname, tcp->pid);
473		tcp->outf = strace_fopen(name);
474	}
475}
476
477static void
478startup_attach(void)
479{
480	int tcbi;
481	struct tcb *tcp;
482
483	/*
484	 * Block user interruptions as we would leave the traced
485	 * process stopped (process state T) if we would terminate in
486	 * between PTRACE_ATTACH and wait4() on SIGSTOP.
487	 * We rely on cleanup() from this point on.
488	 */
489	if (interactive)
490		sigprocmask(SIG_BLOCK, &blocked_set, NULL);
491
492	if (daemonized_tracer) {
493		pid_t pid = fork();
494		if (pid < 0) {
495			perror_msg_and_die("fork");
496		}
497		if (pid) { /* parent */
498			/*
499			 * Wait for grandchild to attach to straced process
500			 * (grandparent). Grandchild SIGKILLs us after it attached.
501			 * Grandparent's wait() is unblocked by our death,
502			 * it proceeds to exec the straced program.
503			 */
504			pause();
505			_exit(0); /* paranoia */
506		}
507		/* grandchild */
508		/* We will be the tracer process. Remember our new pid: */
509		strace_tracer_pid = getpid();
510	}
511
512	for (tcbi = 0; tcbi < tcbtabsize; tcbi++) {
513		tcp = tcbtab[tcbi];
514
515		/* Is this a process we should attach to, but not yet attached? */
516		if ((tcp->flags & (TCB_ATTACHED | TCB_STARTUP)) != TCB_ATTACHED)
517			continue; /* no */
518
519		/* Reinitialize the output since it may have changed */
520		tcp->outf = outf;
521		newoutf(tcp);
522
523#ifdef USE_PROCFS
524		if (proc_open(tcp, 1) < 0) {
525			fprintf(stderr, "trouble opening proc file\n");
526			droptcb(tcp);
527			continue;
528		}
529#else /* !USE_PROCFS */
530# ifdef LINUX
531		if (followfork && !daemonized_tracer) {
532			char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
533			DIR *dir;
534
535			sprintf(procdir, "/proc/%d/task", tcp->pid);
536			dir = opendir(procdir);
537			if (dir != NULL) {
538				unsigned int ntid = 0, nerr = 0;
539				struct dirent *de;
540
541				while ((de = readdir(dir)) != NULL) {
542					struct tcb *cur_tcp;
543					int tid;
544
545					if (de->d_fileno == 0)
546						continue;
547					tid = atoi(de->d_name);
548					if (tid <= 0)
549						continue;
550					++ntid;
551					if (ptrace_attach_or_seize(tid) < 0) {
552						++nerr;
553						if (debug)
554							fprintf(stderr, "attach to pid %d failed\n", tid);
555						continue;
556					}
557					if (debug)
558						fprintf(stderr, "attach to pid %d succeeded\n", tid);
559					cur_tcp = tcp;
560					if (tid != tcp->pid)
561						cur_tcp = alloctcb(tid);
562					cur_tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
563				}
564				closedir(dir);
565				if (interactive) {
566					sigprocmask(SIG_SETMASK, &empty_set, NULL);
567					if (interrupted)
568						goto ret;
569					sigprocmask(SIG_BLOCK, &blocked_set, NULL);
570				}
571				ntid -= nerr;
572				if (ntid == 0) {
573					perror("attach: ptrace(PTRACE_ATTACH, ...)");
574					droptcb(tcp);
575					continue;
576				}
577				if (!qflag) {
578					fprintf(stderr, ntid > 1
579? "Process %u attached with %u threads - interrupt to quit\n"
580: "Process %u attached - interrupt to quit\n",
581						tcp->pid, ntid);
582				}
583				if (!(tcp->flags & TCB_STARTUP)) {
584					/* -p PID, we failed to attach to PID itself
585					 * but did attach to some of its sibling threads.
586					 * Drop PID's tcp.
587					 */
588					droptcb(tcp);
589				}
590				continue;
591			} /* if (opendir worked) */
592		} /* if (-f) */
593# endif /* LINUX */
594		if (ptrace_attach_or_seize(tcp->pid) < 0) {
595			perror("attach: ptrace(PTRACE_ATTACH, ...)");
596			droptcb(tcp);
597			continue;
598		}
599		tcp->flags |= TCB_STARTUP | post_attach_sigstop;
600		if (debug)
601			fprintf(stderr, "attach to pid %d (main) succeeded\n", tcp->pid);
602
603		if (daemonized_tracer) {
604			/*
605			 * It is our grandparent we trace, not a -p PID.
606			 * Don't want to just detach on exit, so...
607			 */
608			tcp->flags &= ~TCB_ATTACHED;
609			/*
610			 * Make parent go away.
611			 * Also makes grandparent's wait() unblock.
612			 */
613			kill(getppid(), SIGKILL);
614		}
615
616#endif /* !USE_PROCFS */
617		if (!qflag)
618			fprintf(stderr,
619				"Process %u attached - interrupt to quit\n",
620				tcp->pid);
621	} /* for each tcbtab[] */
622
623 ret:
624	if (interactive)
625		sigprocmask(SIG_SETMASK, &empty_set, NULL);
626}
627
628static void
629startup_child(char **argv)
630{
631	struct stat statbuf;
632	const char *filename;
633	char pathname[MAXPATHLEN];
634	int pid = 0;
635	struct tcb *tcp;
636
637	filename = argv[0];
638	if (strchr(filename, '/')) {
639		if (strlen(filename) > sizeof pathname - 1) {
640			errno = ENAMETOOLONG;
641			perror_msg_and_die("exec");
642		}
643		strcpy(pathname, filename);
644	}
645#ifdef USE_DEBUGGING_EXEC
646	/*
647	 * Debuggers customarily check the current directory
648	 * first regardless of the path but doing that gives
649	 * security geeks a panic attack.
650	 */
651	else if (stat(filename, &statbuf) == 0)
652		strcpy(pathname, filename);
653#endif /* USE_DEBUGGING_EXEC */
654	else {
655		const char *path;
656		int m, n, len;
657
658		for (path = getenv("PATH"); path && *path; path += m) {
659			const char *colon = strchr(path, ':');
660			if (colon) {
661				n = colon - path;
662				m = n + 1;
663			}
664			else
665				m = n = strlen(path);
666			if (n == 0) {
667				if (!getcwd(pathname, MAXPATHLEN))
668					continue;
669				len = strlen(pathname);
670			}
671			else if (n > sizeof pathname - 1)
672				continue;
673			else {
674				strncpy(pathname, path, n);
675				len = n;
676			}
677			if (len && pathname[len - 1] != '/')
678				pathname[len++] = '/';
679			strcpy(pathname + len, filename);
680			if (stat(pathname, &statbuf) == 0 &&
681			    /* Accept only regular files
682			       with some execute bits set.
683			       XXX not perfect, might still fail */
684			    S_ISREG(statbuf.st_mode) &&
685			    (statbuf.st_mode & 0111))
686				break;
687		}
688	}
689	if (stat(pathname, &statbuf) < 0) {
690		perror_msg_and_die("Can't stat '%s'", filename);
691	}
692	strace_child = pid = fork();
693	if (pid < 0) {
694		perror_msg_and_die("fork");
695	}
696	if ((pid != 0 && daemonized_tracer) /* -D: parent to become a traced process */
697	 || (pid == 0 && !daemonized_tracer) /* not -D: child to become a traced process */
698	) {
699		pid = getpid();
700		if (outf != stderr)
701			close(fileno(outf));
702#ifdef USE_PROCFS
703# ifdef MIPS
704		/* Kludge for SGI, see proc_open for details. */
705		sa.sa_handler = foobar;
706		sa.sa_flags = 0;
707		sigemptyset(&sa.sa_mask);
708		sigaction(SIGINT, &sa, NULL);
709# endif
710# ifndef FREEBSD
711		pause();
712# else
713		kill(pid, SIGSTOP);
714# endif
715#else /* !USE_PROCFS */
716		if (!daemonized_tracer && !use_seize) {
717			if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0) {
718				perror_msg_and_die("ptrace(PTRACE_TRACEME, ...)");
719			}
720		}
721
722		if (username != NULL) {
723			uid_t run_euid = run_uid;
724			gid_t run_egid = run_gid;
725
726			if (statbuf.st_mode & S_ISUID)
727				run_euid = statbuf.st_uid;
728			if (statbuf.st_mode & S_ISGID)
729				run_egid = statbuf.st_gid;
730			/*
731			 * It is important to set groups before we
732			 * lose privileges on setuid.
733			 */
734			if (initgroups(username, run_gid) < 0) {
735				perror_msg_and_die("initgroups");
736			}
737			if (setregid(run_gid, run_egid) < 0) {
738				perror_msg_and_die("setregid");
739			}
740			if (setreuid(run_uid, run_euid) < 0) {
741				perror_msg_and_die("setreuid");
742			}
743		}
744		else if (geteuid() != 0)
745			setreuid(run_uid, run_uid);
746
747		if (!daemonized_tracer) {
748			/*
749			 * Induce a ptrace stop. Tracer (our parent)
750			 * will resume us with PTRACE_SYSCALL and display
751			 * the immediately following execve syscall.
752			 * Can't do this on NOMMU systems, we are after
753			 * vfork: parent is blocked, stopping would deadlock.
754			 */
755			if (!strace_vforked)
756				kill(pid, SIGSTOP);
757		} else {
758			struct sigaction sv_sigchld;
759			sigaction(SIGCHLD, NULL, &sv_sigchld);
760			/*
761			 * Make sure it is not SIG_IGN, otherwise wait
762			 * will not block.
763			 */
764			signal(SIGCHLD, SIG_DFL);
765			/*
766			 * Wait for grandchild to attach to us.
767			 * It kills child after that, and wait() unblocks.
768			 */
769			alarm(3);
770			wait(NULL);
771			alarm(0);
772			sigaction(SIGCHLD, &sv_sigchld, NULL);
773		}
774#endif /* !USE_PROCFS */
775
776		execv(pathname, argv);
777		perror_msg_and_die("exec");
778	}
779
780	/* We are the tracer */
781
782	if (!daemonized_tracer) {
783		if (!use_seize) {
784			/* child did PTRACE_TRACEME, nothing to do in parent */
785		} else {
786			if (!strace_vforked) {
787				/* Wait until child stopped itself */
788				int status;
789				while (waitpid(pid, &status, WSTOPPED) < 0) {
790					if (errno == EINTR)
791						continue;
792					perror_msg_and_die("waitpid");
793				}
794				if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) {
795					kill(pid, SIGKILL);
796					perror_msg_and_die("Unexpected wait status %x", status);
797				}
798			}
799			/* Else: vforked case, we have no way to sync.
800			 * Just attach to it as soon as possible.
801			 * This means that we may miss a few first syscalls...
802			 */
803
804			if (ptrace_attach_or_seize(pid)) {
805				kill(pid, SIGKILL);
806				perror_msg_and_die("Can't attach to %d", pid);
807			}
808			if (!strace_vforked)
809				kill(pid, SIGCONT);
810		}
811		tcp = alloctcb(pid);
812		if (!strace_vforked)
813			tcp->flags |= TCB_STARTUP | post_attach_sigstop;
814		else
815			tcp->flags |= TCB_STARTUP;
816	}
817	else {
818		/* With -D, *we* are child here, IOW: different pid. Fetch it: */
819		strace_tracer_pid = getpid();
820		/* The tracee is our parent: */
821		pid = getppid();
822		tcp = alloctcb(pid);
823		/* We want subsequent startup_attach() to attach to it: */
824		tcp->flags |= TCB_ATTACHED;
825	}
826#ifdef USE_PROCFS
827	if (proc_open(tcp, 0) < 0) {
828		perror_msg_and_die("trouble opening proc file");
829	}
830#endif
831}
832
833#ifdef LINUX
834static void kill_save_errno(pid_t pid, int sig)
835{
836	int saved_errno = errno;
837
838	(void) kill(pid, sig);
839	errno = saved_errno;
840}
841
842/*
843 * Test whether the kernel support PTRACE_O_TRACECLONE et al options.
844 * First fork a new child, call ptrace with PTRACE_SETOPTIONS on it,
845 * and then see which options are supported by the kernel.
846 */
847static void
848test_ptrace_setoptions_followfork(void)
849{
850	int pid, expected_grandchild = 0, found_grandchild = 0;
851	const unsigned int test_options = PTRACE_O_TRACECLONE |
852					  PTRACE_O_TRACEFORK |
853					  PTRACE_O_TRACEVFORK;
854
855	pid = fork();
856	if (pid < 0)
857		perror_msg_and_die("fork");
858	if (pid == 0) {
859		pid = getpid();
860		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
861			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
862					   __func__);
863		kill(pid, SIGSTOP);
864		if (fork() < 0)
865			perror_msg_and_die("fork");
866		_exit(0);
867	}
868
869	while (1) {
870		int status, tracee_pid;
871
872		errno = 0;
873		tracee_pid = wait(&status);
874		if (tracee_pid <= 0) {
875			if (errno == EINTR)
876				continue;
877			else if (errno == ECHILD)
878				break;
879			kill_save_errno(pid, SIGKILL);
880			perror_msg_and_die("%s: unexpected wait result %d",
881					   __func__, tracee_pid);
882		}
883		if (WIFEXITED(status)) {
884			if (WEXITSTATUS(status)) {
885				if (tracee_pid != pid)
886					kill_save_errno(pid, SIGKILL);
887				error_msg_and_die("%s: unexpected exit status %u",
888						  __func__, WEXITSTATUS(status));
889			}
890			continue;
891		}
892		if (WIFSIGNALED(status)) {
893			if (tracee_pid != pid)
894				kill_save_errno(pid, SIGKILL);
895			error_msg_and_die("%s: unexpected signal %u",
896					  __func__, WTERMSIG(status));
897		}
898		if (!WIFSTOPPED(status)) {
899			if (tracee_pid != pid)
900				kill_save_errno(tracee_pid, SIGKILL);
901			kill(pid, SIGKILL);
902			error_msg_and_die("%s: unexpected wait status %x",
903					  __func__, status);
904		}
905		if (tracee_pid != pid) {
906			found_grandchild = tracee_pid;
907			if (ptrace(PTRACE_CONT, tracee_pid, 0, 0) < 0) {
908				kill_save_errno(tracee_pid, SIGKILL);
909				kill_save_errno(pid, SIGKILL);
910				perror_msg_and_die("PTRACE_CONT doesn't work");
911			}
912			continue;
913		}
914		switch (WSTOPSIG(status)) {
915		case SIGSTOP:
916			if (ptrace(PTRACE_SETOPTIONS, pid, 0, test_options) < 0
917			    && errno != EINVAL && errno != EIO)
918				perror_msg("PTRACE_SETOPTIONS");
919			break;
920		case SIGTRAP:
921			if (status >> 16 == PTRACE_EVENT_FORK) {
922				long msg = 0;
923
924				if (ptrace(PTRACE_GETEVENTMSG, pid,
925					   NULL, (long) &msg) == 0)
926					expected_grandchild = msg;
927			}
928			break;
929		}
930		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) {
931			kill_save_errno(pid, SIGKILL);
932			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
933		}
934	}
935	if (expected_grandchild && expected_grandchild == found_grandchild) {
936		ptrace_setoptions |= test_options;
937		if (debug)
938			fprintf(stderr, "ptrace_setoptions = %#x\n",
939				ptrace_setoptions);
940		return;
941	}
942	error_msg("Test for PTRACE_O_TRACECLONE failed, "
943		  "giving up using this feature.");
944}
945
946/*
947 * Test whether the kernel support PTRACE_O_TRACESYSGOOD.
948 * First fork a new child, call ptrace(PTRACE_SETOPTIONS) on it,
949 * and then see whether it will stop with (SIGTRAP | 0x80).
950 *
951 * Use of this option enables correct handling of user-generated SIGTRAPs,
952 * and SIGTRAPs generated by special instructions such as int3 on x86:
953 * _start:	.globl	_start
954 *		int3
955 *		movl	$42, %ebx
956 *		movl	$1, %eax
957 *		int	$0x80
958 * (compile with: "gcc -nostartfiles -nostdlib -o int3 int3.S")
959 */
960static void
961test_ptrace_setoptions_for_all(void)
962{
963	const unsigned int test_options = PTRACE_O_TRACESYSGOOD |
964					  PTRACE_O_TRACEEXEC;
965	int pid;
966	int it_worked = 0;
967
968	pid = fork();
969	if (pid < 0)
970		perror_msg_and_die("fork");
971
972	if (pid == 0) {
973		pid = getpid();
974		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) < 0)
975			/* Note: exits with exitcode 1 */
976			perror_msg_and_die("%s: PTRACE_TRACEME doesn't work",
977					   __func__);
978		kill(pid, SIGSTOP);
979		_exit(0); /* parent should see entry into this syscall */
980	}
981
982	while (1) {
983		int status, tracee_pid;
984
985		errno = 0;
986		tracee_pid = wait(&status);
987		if (tracee_pid <= 0) {
988			if (errno == EINTR)
989				continue;
990			kill_save_errno(pid, SIGKILL);
991			perror_msg_and_die("%s: unexpected wait result %d",
992					   __func__, tracee_pid);
993		}
994		if (WIFEXITED(status)) {
995			if (WEXITSTATUS(status) == 0)
996				break;
997			error_msg_and_die("%s: unexpected exit status %u",
998					  __func__, WEXITSTATUS(status));
999		}
1000		if (WIFSIGNALED(status)) {
1001			error_msg_and_die("%s: unexpected signal %u",
1002					  __func__, WTERMSIG(status));
1003		}
1004		if (!WIFSTOPPED(status)) {
1005			kill(pid, SIGKILL);
1006			error_msg_and_die("%s: unexpected wait status %x",
1007					  __func__, status);
1008		}
1009		if (WSTOPSIG(status) == SIGSTOP) {
1010			/*
1011			 * We don't check "options aren't accepted" error.
1012			 * If it happens, we'll never get (SIGTRAP | 0x80),
1013			 * and thus will decide to not use the option.
1014			 * IOW: the outcome of the test will be correct.
1015			 */
1016			if (ptrace(PTRACE_SETOPTIONS, pid, 0L, test_options) < 0
1017			    && errno != EINVAL && errno != EIO)
1018				perror_msg("PTRACE_SETOPTIONS");
1019		}
1020		if (WSTOPSIG(status) == (SIGTRAP | 0x80)) {
1021			it_worked = 1;
1022		}
1023		if (ptrace(PTRACE_SYSCALL, pid, 0L, 0L) < 0) {
1024			kill_save_errno(pid, SIGKILL);
1025			perror_msg_and_die("PTRACE_SYSCALL doesn't work");
1026		}
1027	}
1028
1029	if (it_worked) {
1030		syscall_trap_sig = (SIGTRAP | 0x80);
1031		ptrace_setoptions |= test_options;
1032		if (debug)
1033			fprintf(stderr, "ptrace_setoptions = %#x\n",
1034				ptrace_setoptions);
1035		return;
1036	}
1037
1038	error_msg("Test for PTRACE_O_TRACESYSGOOD failed, "
1039		  "giving up using this feature.");
1040}
1041
1042# ifdef USE_SEIZE
1043static void
1044test_ptrace_seize(void)
1045{
1046	int pid;
1047
1048	pid = fork();
1049	if (pid < 0)
1050		perror_msg_and_die("fork");
1051
1052	if (pid == 0) {
1053		pause();
1054		_exit(0);
1055	}
1056
1057	/* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap.  After
1058	 * attaching tracee continues to run unless a trap condition occurs.
1059	 * PTRACE_SEIZE doesn't affect signal or group stop state.
1060	 */
1061	if (ptrace(PTRACE_SEIZE, pid, 0, PTRACE_SEIZE_DEVEL) == 0) {
1062		post_attach_sigstop = 0; /* this sets use_seize to 1 */
1063	} else if (debug) {
1064		fprintf(stderr, "PTRACE_SEIZE doesn't work\n");
1065	}
1066
1067	kill(pid, SIGKILL);
1068
1069	while (1) {
1070		int status, tracee_pid;
1071
1072		errno = 0;
1073		tracee_pid = waitpid(pid, &status, 0);
1074		if (tracee_pid <= 0) {
1075			if (errno == EINTR)
1076				continue;
1077			perror_msg_and_die("%s: unexpected wait result %d",
1078					 __func__, tracee_pid);
1079		}
1080		if (WIFSIGNALED(status)) {
1081			return;
1082		}
1083		error_msg_and_die("%s: unexpected wait status %x",
1084				__func__, status);
1085	}
1086}
1087# else /* !USE_SEIZE */
1088#  define test_ptrace_seize() ((void)0)
1089# endif
1090
1091#endif
1092
1093/* Noinline: don't want main to have struct utsname permanently on stack */
1094static void __attribute__ ((noinline))
1095get_os_release(void)
1096{
1097	struct utsname u;
1098	if (uname(&u) < 0)
1099		perror_msg_and_die("uname");
1100	os_release = strdup(u.release);
1101	if (!os_release)
1102		die_out_of_memory();
1103}
1104
1105int
1106main(int argc, char *argv[])
1107{
1108	struct tcb *tcp;
1109	int c, pid = 0;
1110	int optF = 0;
1111	struct sigaction sa;
1112
1113	progname = argv[0] ? argv[0] : "strace";
1114
1115	strace_tracer_pid = getpid();
1116
1117	get_os_release();
1118
1119	/* Allocate the initial tcbtab.  */
1120	tcbtabsize = argc;	/* Surely enough for all -p args.  */
1121	tcbtab = calloc(tcbtabsize, sizeof(tcbtab[0]));
1122	if (!tcbtab)
1123		die_out_of_memory();
1124	tcp = calloc(tcbtabsize, sizeof(*tcp));
1125	if (!tcp)
1126		die_out_of_memory();
1127	for (c = 0; c < tcbtabsize; c++)
1128		tcbtab[c] = tcp++;
1129
1130	outf = stderr;
1131	set_sortby(DEFAULT_SORTBY);
1132	set_personality(DEFAULT_PERSONALITY);
1133	qualify("trace=all");
1134	qualify("abbrev=all");
1135	qualify("verbose=all");
1136	qualify("signal=all");
1137	while ((c = getopt(argc, argv,
1138		"+cCdfFhiqrtTvVxyz"
1139#ifndef USE_PROCFS
1140		"D"
1141#endif
1142		"a:e:o:O:p:s:S:u:E:P:I:")) != EOF) {
1143		switch (c) {
1144		case 'c':
1145			if (cflag == CFLAG_BOTH) {
1146				error_msg_and_die("-c and -C are mutually exclusive options");
1147			}
1148			cflag = CFLAG_ONLY_STATS;
1149			break;
1150		case 'C':
1151			if (cflag == CFLAG_ONLY_STATS) {
1152				error_msg_and_die("-c and -C are mutually exclusive options");
1153			}
1154			cflag = CFLAG_BOTH;
1155			break;
1156		case 'd':
1157			debug++;
1158			break;
1159#ifndef USE_PROCFS
1160		case 'D':
1161			daemonized_tracer = 1;
1162			break;
1163#endif
1164		case 'F':
1165			optF = 1;
1166			break;
1167		case 'f':
1168			followfork++;
1169			break;
1170		case 'h':
1171			usage(stdout, 0);
1172			break;
1173		case 'i':
1174			iflag++;
1175			break;
1176		case 'q':
1177			qflag++;
1178			break;
1179		case 'r':
1180			rflag++;
1181			tflag++;
1182			break;
1183		case 't':
1184			tflag++;
1185			break;
1186		case 'T':
1187			dtime++;
1188			break;
1189		case 'x':
1190			xflag++;
1191			break;
1192		case 'y':
1193			show_fd_path = 1;
1194			break;
1195		case 'v':
1196			qualify("abbrev=none");
1197			break;
1198		case 'V':
1199			printf("%s -- version %s\n", PACKAGE_NAME, VERSION);
1200			exit(0);
1201			break;
1202		case 'z':
1203			not_failing_only = 1;
1204			break;
1205		case 'a':
1206			acolumn = atoi(optarg);
1207			if (acolumn < 0)
1208				error_msg_and_die("Bad column width '%s'", optarg);
1209			break;
1210		case 'e':
1211			qualify(optarg);
1212			break;
1213		case 'o':
1214			outfname = strdup(optarg);
1215			break;
1216		case 'O':
1217			set_overhead(atoi(optarg));
1218			break;
1219		case 'p':
1220			pid = atoi(optarg);
1221			if (pid <= 0) {
1222				error_msg("Invalid process id: '%s'", optarg);
1223				break;
1224			}
1225			if (pid == strace_tracer_pid) {
1226				error_msg("I'm sorry, I can't let you do that, Dave.");
1227				break;
1228			}
1229			tcp = alloc_tcb(pid, 0);
1230			tcp->flags |= TCB_ATTACHED;
1231			pflag_seen++;
1232			break;
1233		case 'P':
1234			tracing_paths = 1;
1235			if (pathtrace_select(optarg)) {
1236				error_msg_and_die("Failed to select path '%s'", optarg);
1237			}
1238			break;
1239		case 's':
1240			max_strlen = atoi(optarg);
1241			if (max_strlen < 0) {
1242				error_msg_and_die("Invalid -%c argument: '%s'", c, optarg);
1243			}
1244			break;
1245		case 'S':
1246			set_sortby(optarg);
1247			break;
1248		case 'u':
1249			username = strdup(optarg);
1250			break;
1251		case 'E':
1252			if (putenv(optarg) < 0)
1253				die_out_of_memory();
1254			break;
1255		case 'I':
1256			opt_intr = atoi(optarg);
1257			if (opt_intr <= 0 || opt_intr >= NUM_INTR_OPTS) {
1258				error_msg_and_die("Invalid -%c argument: '%s'", c, optarg);
1259			}
1260			break;
1261		default:
1262			usage(stderr, 1);
1263			break;
1264		}
1265	}
1266	argv += optind;
1267	/* argc -= optind; - no need, argc is not used below */
1268
1269	acolumn_spaces = malloc(acolumn + 1);
1270	if (!acolumn_spaces)
1271		die_out_of_memory();
1272	memset(acolumn_spaces, ' ', acolumn);
1273	acolumn_spaces[acolumn] = '\0';
1274
1275	/* Must have PROG [ARGS], or -p PID. Not both. */
1276	if (!argv[0] == !pflag_seen)
1277		usage(stderr, 1);
1278
1279	if (pflag_seen && daemonized_tracer) {
1280		error_msg_and_die("-D and -p are mutually exclusive options");
1281	}
1282
1283	if (!followfork)
1284		followfork = optF;
1285
1286	if (followfork > 1 && cflag) {
1287		error_msg_and_die("(-c or -C) and -ff are mutually exclusive options");
1288	}
1289
1290	/* See if they want to run as another user. */
1291	if (username != NULL) {
1292		struct passwd *pent;
1293
1294		if (getuid() != 0 || geteuid() != 0) {
1295			error_msg_and_die("You must be root to use the -u option");
1296		}
1297		pent = getpwnam(username);
1298		if (pent == NULL) {
1299			error_msg_and_die("Cannot find user '%s'", username);
1300		}
1301		run_uid = pent->pw_uid;
1302		run_gid = pent->pw_gid;
1303	}
1304	else {
1305		run_uid = getuid();
1306		run_gid = getgid();
1307	}
1308
1309#ifdef LINUX
1310	if (followfork)
1311		test_ptrace_setoptions_followfork();
1312	test_ptrace_setoptions_for_all();
1313	test_ptrace_seize();
1314#endif
1315
1316	/* Check if they want to redirect the output. */
1317	if (outfname) {
1318		/* See if they want to pipe the output. */
1319		if (outfname[0] == '|' || outfname[0] == '!') {
1320			/*
1321			 * We can't do the <outfname>.PID funny business
1322			 * when using popen, so prohibit it.
1323			 */
1324			if (followfork > 1)
1325				error_msg_and_die("Piping the output and -ff are mutually exclusive");
1326			outf = strace_popen(outfname + 1);
1327		}
1328		else if (followfork <= 1)
1329			outf = strace_fopen(outfname);
1330	}
1331
1332	if (!outfname || outfname[0] == '|' || outfname[0] == '!') {
1333		char *buf = malloc(BUFSIZ);
1334		if (!buf)
1335			die_out_of_memory();
1336		setvbuf(outf, buf, _IOLBF, BUFSIZ);
1337	}
1338	if (outfname && argv[0]) {
1339		if (!opt_intr)
1340			opt_intr = INTR_NEVER;
1341		qflag = 1;
1342	}
1343	if (!opt_intr)
1344		opt_intr = INTR_WHILE_WAIT;
1345
1346	/* argv[0]	-pPID	-oFILE	Default interactive setting
1347	 * yes		0	0	INTR_WHILE_WAIT
1348	 * no		1	0	INTR_WHILE_WAIT
1349	 * yes		0	1	INTR_NEVER
1350	 * no		1	1	INTR_WHILE_WAIT
1351	 */
1352
1353	/* STARTUP_CHILD must be called before the signal handlers get
1354	   installed below as they are inherited into the spawned process.
1355	   Also we do not need to be protected by them as during interruption
1356	   in the STARTUP_CHILD mode we kill the spawned process anyway.  */
1357	if (argv[0])
1358		startup_child(argv);
1359
1360	sigemptyset(&empty_set);
1361	sigemptyset(&blocked_set);
1362	sa.sa_handler = SIG_IGN;
1363	sigemptyset(&sa.sa_mask);
1364	sa.sa_flags = 0;
1365	sigaction(SIGTTOU, &sa, NULL); /* SIG_IGN */
1366	sigaction(SIGTTIN, &sa, NULL); /* SIG_IGN */
1367	if (opt_intr != INTR_ANYWHERE) {
1368		if (opt_intr == INTR_BLOCK_TSTP_TOO)
1369			sigaction(SIGTSTP, &sa, NULL); /* SIG_IGN */
1370		/*
1371		 * In interactive mode (if no -o OUTFILE, or -p PID is used),
1372		 * fatal signals are blocked while syscall stop is processed,
1373		 * and acted on in between, when waiting for new syscall stops.
1374		 * In non-interactive mode, signals are ignored.
1375		 */
1376		if (opt_intr == INTR_WHILE_WAIT) {
1377			sigaddset(&blocked_set, SIGHUP);
1378			sigaddset(&blocked_set, SIGINT);
1379			sigaddset(&blocked_set, SIGQUIT);
1380			sigaddset(&blocked_set, SIGPIPE);
1381			sigaddset(&blocked_set, SIGTERM);
1382			sa.sa_handler = interrupt;
1383#ifdef SUNOS4
1384			/* POSIX signals on sunos4.1 are a little broken. */
1385			sa.sa_flags = SA_INTERRUPT;
1386#endif
1387		}
1388		/* SIG_IGN, or set handler for these */
1389		sigaction(SIGHUP, &sa, NULL);
1390		sigaction(SIGINT, &sa, NULL);
1391		sigaction(SIGQUIT, &sa, NULL);
1392		sigaction(SIGPIPE, &sa, NULL);
1393		sigaction(SIGTERM, &sa, NULL);
1394	}
1395#ifdef USE_PROCFS
1396	sa.sa_handler = reaper;
1397	sigaction(SIGCHLD, &sa, NULL);
1398#else
1399	/* Make sure SIGCHLD has the default action so that waitpid
1400	   definitely works without losing track of children.  The user
1401	   should not have given us a bogus state to inherit, but he might
1402	   have.  Arguably we should detect SIG_IGN here and pass it on
1403	   to children, but probably noone really needs that.  */
1404	sa.sa_handler = SIG_DFL;
1405	sigaction(SIGCHLD, &sa, NULL);
1406#endif /* USE_PROCFS */
1407
1408	if (pflag_seen || daemonized_tracer)
1409		startup_attach();
1410
1411	if (trace() < 0)
1412		exit(1);
1413
1414	cleanup();
1415	fflush(NULL);
1416	if (exit_code > 0xff) {
1417		/* Child was killed by a signal, mimic that.  */
1418		exit_code &= 0xff;
1419		signal(exit_code, SIG_DFL);
1420		raise(exit_code);
1421		/* Paranoia - what if this signal is not fatal?
1422		   Exit with 128 + signo then.  */
1423		exit_code += 128;
1424	}
1425	exit(exit_code);
1426}
1427
1428static void
1429expand_tcbtab(void)
1430{
1431	/* Allocate some more TCBs and expand the table.
1432	   We don't want to relocate the TCBs because our
1433	   callers have pointers and it would be a pain.
1434	   So tcbtab is a table of pointers.  Since we never
1435	   free the TCBs, we allocate a single chunk of many.  */
1436	int i = tcbtabsize;
1437	struct tcb *newtcbs = calloc(tcbtabsize, sizeof(newtcbs[0]));
1438	struct tcb **newtab = realloc(tcbtab, tcbtabsize * 2 * sizeof(tcbtab[0]));
1439	if (!newtab || !newtcbs)
1440		die_out_of_memory();
1441	tcbtabsize *= 2;
1442	tcbtab = newtab;
1443	while (i < tcbtabsize)
1444		tcbtab[i++] = newtcbs++;
1445}
1446
1447struct tcb *
1448alloc_tcb(int pid, int command_options_parsed)
1449{
1450	int i;
1451	struct tcb *tcp;
1452
1453	if (nprocs == tcbtabsize)
1454		expand_tcbtab();
1455
1456	for (i = 0; i < tcbtabsize; i++) {
1457		tcp = tcbtab[i];
1458		if ((tcp->flags & TCB_INUSE) == 0) {
1459			memset(tcp, 0, sizeof(*tcp));
1460			tcp->pid = pid;
1461			tcp->flags = TCB_INUSE;
1462			tcp->outf = outf; /* Initialise to current out file */
1463#if SUPPORTED_PERSONALITIES > 1
1464			tcp->currpers = current_personality;
1465#endif
1466#ifdef USE_PROCFS
1467			tcp->pfd = -1;
1468#endif
1469			nprocs++;
1470			if (debug)
1471				fprintf(stderr, "new tcb for pid %d, active tcbs:%d\n", tcp->pid, nprocs);
1472			if (command_options_parsed)
1473				newoutf(tcp);
1474			return tcp;
1475		}
1476	}
1477	error_msg_and_die("bug in alloc_tcb");
1478}
1479
1480#ifdef USE_PROCFS
1481int
1482proc_open(struct tcb *tcp, int attaching)
1483{
1484	char proc[32];
1485	long arg;
1486#ifdef SVR4
1487	int i;
1488	sysset_t syscalls;
1489	sigset_t signals;
1490	fltset_t faults;
1491#endif
1492#ifndef HAVE_POLLABLE_PROCFS
1493	static int last_pfd;
1494#endif
1495
1496#ifdef HAVE_MP_PROCFS
1497	/* Open the process pseudo-files in /proc. */
1498	sprintf(proc, "/proc/%d/ctl", tcp->pid);
1499	tcp->pfd = open(proc, O_WRONLY|O_EXCL);
1500	if (tcp->pfd < 0) {
1501		perror("strace: open(\"/proc/...\", ...)");
1502		return -1;
1503	}
1504	set_cloexec_flag(tcp->pfd);
1505	sprintf(proc, "/proc/%d/status", tcp->pid);
1506	tcp->pfd_stat = open(proc, O_RDONLY|O_EXCL);
1507	if (tcp->pfd_stat < 0) {
1508		perror("strace: open(\"/proc/...\", ...)");
1509		return -1;
1510	}
1511	set_cloexec_flag(tcp->pfd_stat);
1512	sprintf(proc, "/proc/%d/as", tcp->pid);
1513	tcp->pfd_as = open(proc, O_RDONLY|O_EXCL);
1514	if (tcp->pfd_as < 0) {
1515		perror("strace: open(\"/proc/...\", ...)");
1516		return -1;
1517	}
1518	set_cloexec_flag(tcp->pfd_as);
1519#else
1520	/* Open the process pseudo-file in /proc. */
1521# ifndef FREEBSD
1522	sprintf(proc, "/proc/%d", tcp->pid);
1523	tcp->pfd = open(proc, O_RDWR|O_EXCL);
1524# else
1525	sprintf(proc, "/proc/%d/mem", tcp->pid);
1526	tcp->pfd = open(proc, O_RDWR);
1527# endif
1528	if (tcp->pfd < 0) {
1529		perror("strace: open(\"/proc/...\", ...)");
1530		return -1;
1531	}
1532	set_cloexec_flag(tcp->pfd);
1533#endif
1534#ifdef FREEBSD
1535	sprintf(proc, "/proc/%d/regs", tcp->pid);
1536	tcp->pfd_reg = open(proc, O_RDONLY);
1537	if (tcp->pfd_reg < 0) {
1538		perror("strace: open(\"/proc/.../regs\", ...)");
1539		return -1;
1540	}
1541	if (cflag) {
1542		sprintf(proc, "/proc/%d/status", tcp->pid);
1543		tcp->pfd_status = open(proc, O_RDONLY);
1544		if (tcp->pfd_status < 0) {
1545			perror("strace: open(\"/proc/.../status\", ...)");
1546			return -1;
1547		}
1548	} else
1549		tcp->pfd_status = -1;
1550#endif /* FREEBSD */
1551	rebuild_pollv();
1552	if (!attaching) {
1553		/*
1554		 * Wait for the child to pause.  Because of a race
1555		 * condition we have to poll for the event.
1556		 */
1557		for (;;) {
1558			if (IOCTL_STATUS(tcp) < 0) {
1559				perror("strace: PIOCSTATUS");
1560				return -1;
1561			}
1562			if (tcp->status.PR_FLAGS & PR_ASLEEP)
1563				break;
1564		}
1565	}
1566#ifndef FREEBSD
1567	/* Stop the process so that we own the stop. */
1568	if (IOCTL(tcp->pfd, PIOCSTOP, (char *)NULL) < 0) {
1569		perror("strace: PIOCSTOP");
1570		return -1;
1571	}
1572#endif
1573#ifdef PIOCSET
1574	/* Set Run-on-Last-Close. */
1575	arg = PR_RLC;
1576	if (IOCTL(tcp->pfd, PIOCSET, &arg) < 0) {
1577		perror("PIOCSET PR_RLC");
1578		return -1;
1579	}
1580	/* Set or Reset Inherit-on-Fork. */
1581	arg = PR_FORK;
1582	if (IOCTL(tcp->pfd, followfork ? PIOCSET : PIOCRESET, &arg) < 0) {
1583		perror("PIOC{SET,RESET} PR_FORK");
1584		return -1;
1585	}
1586#else  /* !PIOCSET */
1587#ifndef FREEBSD
1588	if (ioctl(tcp->pfd, PIOCSRLC) < 0) {
1589		perror("PIOCSRLC");
1590		return -1;
1591	}
1592	if (ioctl(tcp->pfd, followfork ? PIOCSFORK : PIOCRFORK) < 0) {
1593		perror("PIOC{S,R}FORK");
1594		return -1;
1595	}
1596#else /* FREEBSD */
1597	/* just unset the PF_LINGER flag for the Run-on-Last-Close. */
1598	if (ioctl(tcp->pfd, PIOCGFL, &arg) < 0) {
1599	        perror("PIOCGFL");
1600		return -1;
1601	}
1602	arg &= ~PF_LINGER;
1603	if (ioctl(tcp->pfd, PIOCSFL, arg) < 0) {
1604		perror("PIOCSFL");
1605		return -1;
1606	}
1607#endif /* FREEBSD */
1608#endif /* !PIOCSET */
1609#ifndef FREEBSD
1610	/* Enable all syscall entries we care about. */
1611	premptyset(&syscalls);
1612	for (i = 1; i < MAX_QUALS; ++i) {
1613		if (i > (sizeof syscalls) * CHAR_BIT) break;
1614		if (qual_flags[i] & QUAL_TRACE) praddset(&syscalls, i);
1615	}
1616	praddset(&syscalls, SYS_execve);
1617	if (followfork) {
1618		praddset(&syscalls, SYS_fork);
1619#ifdef SYS_forkall
1620		praddset(&syscalls, SYS_forkall);
1621#endif
1622#ifdef SYS_fork1
1623		praddset(&syscalls, SYS_fork1);
1624#endif
1625#ifdef SYS_rfork1
1626		praddset(&syscalls, SYS_rfork1);
1627#endif
1628#ifdef SYS_rforkall
1629		praddset(&syscalls, SYS_rforkall);
1630#endif
1631	}
1632	if (IOCTL(tcp->pfd, PIOCSENTRY, &syscalls) < 0) {
1633		perror("PIOCSENTRY");
1634		return -1;
1635	}
1636	/* Enable the syscall exits. */
1637	if (IOCTL(tcp->pfd, PIOCSEXIT, &syscalls) < 0) {
1638		perror("PIOSEXIT");
1639		return -1;
1640	}
1641	/* Enable signals we care about. */
1642	premptyset(&signals);
1643	for (i = 1; i < MAX_QUALS; ++i) {
1644		if (i > (sizeof signals) * CHAR_BIT) break;
1645		if (qual_flags[i] & QUAL_SIGNAL) praddset(&signals, i);
1646	}
1647	if (IOCTL(tcp->pfd, PIOCSTRACE, &signals) < 0) {
1648		perror("PIOCSTRACE");
1649		return -1;
1650	}
1651	/* Enable faults we care about */
1652	premptyset(&faults);
1653	for (i = 1; i < MAX_QUALS; ++i) {
1654		if (i > (sizeof faults) * CHAR_BIT) break;
1655		if (qual_flags[i] & QUAL_FAULT) praddset(&faults, i);
1656	}
1657	if (IOCTL(tcp->pfd, PIOCSFAULT, &faults) < 0) {
1658		perror("PIOCSFAULT");
1659		return -1;
1660	}
1661#else /* FREEBSD */
1662	/* set events flags. */
1663	arg = S_SIG | S_SCE | S_SCX;
1664	if (ioctl(tcp->pfd, PIOCBIS, arg) < 0) {
1665		perror("PIOCBIS");
1666		return -1;
1667	}
1668#endif /* FREEBSD */
1669	if (!attaching) {
1670#ifdef MIPS
1671		/*
1672		 * The SGI PRSABORT doesn't work for pause() so
1673		 * we send it a caught signal to wake it up.
1674		 */
1675		kill(tcp->pid, SIGINT);
1676#else /* !MIPS */
1677#ifdef PRSABORT
1678		/* The child is in a pause(), abort it. */
1679		arg = PRSABORT;
1680		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0) {
1681			perror("PIOCRUN");
1682			return -1;
1683		}
1684#endif
1685#endif /* !MIPS*/
1686#ifdef FREEBSD
1687		/* wake up the child if it received the SIGSTOP */
1688		kill(tcp->pid, SIGCONT);
1689#endif
1690		for (;;) {
1691			/* Wait for the child to do something. */
1692			if (IOCTL_WSTOP(tcp) < 0) {
1693				perror("PIOCWSTOP");
1694				return -1;
1695			}
1696			if (tcp->status.PR_WHY == PR_SYSENTRY) {
1697				tcp->flags &= ~TCB_INSYSCALL;
1698				get_scno(tcp);
1699				if (known_scno(tcp) == SYS_execve)
1700					break;
1701			}
1702			/* Set it running: maybe execve will be next. */
1703#ifndef FREEBSD
1704			arg = 0;
1705			if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0)
1706#else
1707			if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0)
1708#endif
1709			{
1710				perror("PIOCRUN");
1711				return -1;
1712			}
1713#ifdef FREEBSD
1714			/* handle the case where we "opened" the child before
1715			   it did the kill -STOP */
1716			if (tcp->status.PR_WHY == PR_SIGNALLED &&
1717			    tcp->status.PR_WHAT == SIGSTOP)
1718			        kill(tcp->pid, SIGCONT);
1719#endif
1720		}
1721	}
1722#ifdef FREEBSD
1723	else {
1724		if (attaching < 2) {
1725			/* We are attaching to an already running process.
1726			 * Try to figure out the state of the process in syscalls,
1727			 * to handle the first event well.
1728			 * This is done by having a look at the "wchan" property of the
1729			 * process, which tells where it is stopped (if it is). */
1730			FILE * status;
1731			char wchan[20]; /* should be enough */
1732
1733			sprintf(proc, "/proc/%d/status", tcp->pid);
1734			status = fopen(proc, "r");
1735			if (status &&
1736			    (fscanf(status, "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d"
1737				    "%*d,%*d %*d,%*d %19s", wchan) == 1) &&
1738			    strcmp(wchan, "nochan") && strcmp(wchan, "spread") &&
1739			    strcmp(wchan, "stopevent")) {
1740				/* The process is asleep in the middle of a syscall.
1741				   Fake the syscall entry event */
1742				tcp->flags &= ~(TCB_INSYSCALL|TCB_STARTUP);
1743				tcp->status.PR_WHY = PR_SYSENTRY;
1744				trace_syscall(tcp);
1745			}
1746			if (status)
1747				fclose(status);
1748		} /* otherwise it's a fork being followed */
1749	}
1750#endif /* FREEBSD */
1751#ifndef HAVE_POLLABLE_PROCFS
1752	if (proc_poll_pipe[0] != -1)
1753		proc_poller(tcp->pfd);
1754	else if (nprocs > 1) {
1755		proc_poll_open();
1756		proc_poller(last_pfd);
1757		proc_poller(tcp->pfd);
1758	}
1759	last_pfd = tcp->pfd;
1760#endif /* !HAVE_POLLABLE_PROCFS */
1761	return 0;
1762}
1763
1764#endif /* USE_PROCFS */
1765
1766static struct tcb *
1767pid2tcb(int pid)
1768{
1769	int i;
1770
1771	if (pid <= 0)
1772		return NULL;
1773
1774	for (i = 0; i < tcbtabsize; i++) {
1775		struct tcb *tcp = tcbtab[i];
1776		if (tcp->pid == pid && (tcp->flags & TCB_INUSE))
1777			return tcp;
1778	}
1779
1780	return NULL;
1781}
1782
1783#ifdef USE_PROCFS
1784
1785static struct tcb *
1786first_used_tcb(void)
1787{
1788	int i;
1789	struct tcb *tcp;
1790	for (i = 0; i < tcbtabsize; i++) {
1791		tcp = tcbtab[i];
1792		if (tcp->flags & TCB_INUSE)
1793			return tcp;
1794	}
1795	return NULL;
1796}
1797
1798static struct tcb *
1799pfd2tcb(int pfd)
1800{
1801	int i;
1802
1803	for (i = 0; i < tcbtabsize; i++) {
1804		struct tcb *tcp = tcbtab[i];
1805		if (tcp->pfd != pfd)
1806			continue;
1807		if (tcp->flags & TCB_INUSE)
1808			return tcp;
1809	}
1810	return NULL;
1811}
1812
1813#endif /* USE_PROCFS */
1814
1815void
1816droptcb(struct tcb *tcp)
1817{
1818	if (tcp->pid == 0)
1819		return;
1820
1821	nprocs--;
1822	if (debug)
1823		fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
1824
1825#ifdef USE_PROCFS
1826	if (tcp->pfd != -1) {
1827		close(tcp->pfd);
1828		tcp->pfd = -1;
1829# ifdef FREEBSD
1830		if (tcp->pfd_reg != -1) {
1831		        close(tcp->pfd_reg);
1832		        tcp->pfd_reg = -1;
1833		}
1834		if (tcp->pfd_status != -1) {
1835			close(tcp->pfd_status);
1836			tcp->pfd_status = -1;
1837		}
1838# endif
1839		tcp->flags = 0; /* rebuild_pollv needs it */
1840		rebuild_pollv();
1841	}
1842#endif
1843
1844	if (outfname && followfork > 1 && tcp->outf)
1845		fclose(tcp->outf);
1846
1847	memset(tcp, 0, sizeof(*tcp));
1848}
1849
1850/* detach traced process; continue with sig
1851   Never call DETACH twice on the same process as both unattached and
1852   attached-unstopped processes give the same ESRCH.  For unattached process we
1853   would SIGSTOP it and wait for its SIGSTOP notification forever.  */
1854
1855static int
1856detach(struct tcb *tcp)
1857{
1858	int error = 0;
1859#ifdef LINUX
1860	int status, catch_sigstop;
1861#endif
1862
1863	if (tcp->flags & TCB_BPTSET)
1864		clearbpt(tcp);
1865
1866#ifdef LINUX
1867	/*
1868	 * Linux wrongly insists the child be stopped
1869	 * before detaching.  Arghh.  We go through hoops
1870	 * to make a clean break of things.
1871	 */
1872#if defined(SPARC)
1873#undef PTRACE_DETACH
1874#define PTRACE_DETACH PTRACE_SUNDETACH
1875#endif
1876	/*
1877	 * We attached but possibly didn't see the expected SIGSTOP.
1878	 * We must catch exactly one as otherwise the detached process
1879	 * would be left stopped (process state T).
1880	 */
1881	catch_sigstop = (tcp->flags & TCB_IGNORE_ONE_SIGSTOP);
1882	error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, 0);
1883	if (error == 0) {
1884		/* On a clear day, you can see forever. */
1885	}
1886	else if (errno != ESRCH) {
1887		/* Shouldn't happen. */
1888		perror("detach: ptrace(PTRACE_DETACH, ...)");
1889	}
1890	else if (my_tkill(tcp->pid, 0) < 0) {
1891		if (errno != ESRCH)
1892			perror("detach: checking sanity");
1893	}
1894	else if (!catch_sigstop && my_tkill(tcp->pid, SIGSTOP) < 0) {
1895		if (errno != ESRCH)
1896			perror("detach: stopping child");
1897	}
1898	else
1899		catch_sigstop = 1;
1900	if (catch_sigstop) {
1901		for (;;) {
1902#ifdef __WALL
1903			if (wait4(tcp->pid, &status, __WALL, NULL) < 0) {
1904				if (errno == ECHILD) /* Already gone.  */
1905					break;
1906				if (errno != EINVAL) {
1907					perror("detach: waiting");
1908					break;
1909				}
1910#endif /* __WALL */
1911				/* No __WALL here.  */
1912				if (waitpid(tcp->pid, &status, 0) < 0) {
1913					if (errno != ECHILD) {
1914						perror("detach: waiting");
1915						break;
1916					}
1917#ifdef __WCLONE
1918					/* If no processes, try clones.  */
1919					if (wait4(tcp->pid, &status, __WCLONE,
1920						  NULL) < 0) {
1921						if (errno != ECHILD)
1922							perror("detach: waiting");
1923						break;
1924					}
1925#endif /* __WCLONE */
1926				}
1927#ifdef __WALL
1928			}
1929#endif
1930			if (!WIFSTOPPED(status)) {
1931				/* Au revoir, mon ami. */
1932				break;
1933			}
1934			if (WSTOPSIG(status) == SIGSTOP) {
1935				ptrace_restart(PTRACE_DETACH, tcp, 0);
1936				break;
1937			}
1938			error = ptrace_restart(PTRACE_CONT, tcp,
1939					WSTOPSIG(status) == syscall_trap_sig ? 0
1940					: WSTOPSIG(status));
1941			if (error < 0)
1942				break;
1943		}
1944	}
1945#endif /* LINUX */
1946
1947#if defined(SUNOS4)
1948	/* PTRACE_DETACH won't respect `sig' argument, so we post it here. */
1949	error = ptrace_restart(PTRACE_DETACH, tcp, 0);
1950#endif /* SUNOS4 */
1951
1952	if (!qflag)
1953		fprintf(stderr, "Process %u detached\n", tcp->pid);
1954
1955	droptcb(tcp);
1956
1957	return error;
1958}
1959
1960#ifdef USE_PROCFS
1961
1962static void reaper(int sig)
1963{
1964	int pid;
1965	int status;
1966
1967	while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
1968	}
1969}
1970
1971#endif /* USE_PROCFS */
1972
1973static void
1974cleanup(void)
1975{
1976	int i;
1977	struct tcb *tcp;
1978	int fatal_sig;
1979
1980	/* 'interrupted' is a volatile object, fetch it only once */
1981	fatal_sig = interrupted;
1982	if (!fatal_sig)
1983		fatal_sig = SIGTERM;
1984
1985	for (i = 0; i < tcbtabsize; i++) {
1986		tcp = tcbtab[i];
1987		if (!(tcp->flags & TCB_INUSE))
1988			continue;
1989		if (debug)
1990			fprintf(stderr,
1991				"cleanup: looking at pid %u\n", tcp->pid);
1992		if (printing_tcp &&
1993		    (!outfname || followfork < 2 || printing_tcp == tcp)) {
1994			tprints(" <unfinished ...>\n");
1995			printing_tcp = NULL;
1996		}
1997		if (tcp->flags & TCB_ATTACHED)
1998			detach(tcp);
1999		else {
2000			kill(tcp->pid, SIGCONT);
2001			kill(tcp->pid, fatal_sig);
2002		}
2003	}
2004	if (cflag)
2005		call_summary(outf);
2006}
2007
2008static void
2009interrupt(int sig)
2010{
2011	interrupted = sig;
2012}
2013
2014#ifndef HAVE_STRERROR
2015
2016#if !HAVE_DECL_SYS_ERRLIST
2017extern int sys_nerr;
2018extern char *sys_errlist[];
2019#endif /* HAVE_DECL_SYS_ERRLIST */
2020
2021const char *
2022strerror(int err_no)
2023{
2024	static char buf[64];
2025
2026	if (err_no < 1 || err_no >= sys_nerr) {
2027		sprintf(buf, "Unknown error %d", err_no);
2028		return buf;
2029	}
2030	return sys_errlist[err_no];
2031}
2032
2033#endif /* HAVE_STERRROR */
2034
2035#ifndef HAVE_STRSIGNAL
2036
2037#if defined HAVE_SYS_SIGLIST && !defined HAVE_DECL_SYS_SIGLIST
2038extern char *sys_siglist[];
2039#endif
2040#if defined HAVE_SYS__SIGLIST && !defined HAVE_DECL__SYS_SIGLIST
2041extern char *_sys_siglist[];
2042#endif
2043
2044const char *
2045strsignal(int sig)
2046{
2047	static char buf[64];
2048
2049	if (sig < 1 || sig >= NSIG) {
2050		sprintf(buf, "Unknown signal %d", sig);
2051		return buf;
2052	}
2053#ifdef HAVE__SYS_SIGLIST
2054	return _sys_siglist[sig];
2055#else
2056	return sys_siglist[sig];
2057#endif
2058}
2059
2060#endif /* HAVE_STRSIGNAL */
2061
2062#ifdef USE_PROCFS
2063
2064static void
2065rebuild_pollv(void)
2066{
2067	int i, j;
2068
2069	free(pollv);
2070	pollv = malloc(nprocs * sizeof(pollv[0]));
2071	if (!pollv)
2072		die_out_of_memory();
2073
2074	for (i = j = 0; i < tcbtabsize; i++) {
2075		struct tcb *tcp = tcbtab[i];
2076		if (!(tcp->flags & TCB_INUSE))
2077			continue;
2078		pollv[j].fd = tcp->pfd;
2079		pollv[j].events = POLLWANT;
2080		j++;
2081	}
2082	if (j != nprocs) {
2083		error_msg_and_die("proc miscount");
2084	}
2085}
2086
2087#ifndef HAVE_POLLABLE_PROCFS
2088
2089static void
2090proc_poll_open(void)
2091{
2092	int i;
2093
2094	if (pipe(proc_poll_pipe) < 0) {
2095		perror_msg_and_die("pipe");
2096	}
2097	for (i = 0; i < 2; i++) {
2098		set_cloexec_flag(proc_poll_pipe[i]);
2099	}
2100}
2101
2102static int
2103proc_poll(struct pollfd *pollv, int nfds, int timeout)
2104{
2105	int i;
2106	int n;
2107	struct proc_pollfd pollinfo;
2108
2109	n = read(proc_poll_pipe[0], &pollinfo, sizeof(pollinfo));
2110	if (n < 0)
2111		return n;
2112	if (n != sizeof(struct proc_pollfd)) {
2113		error_msg_and_die("panic: short read: %d", n);
2114	}
2115	for (i = 0; i < nprocs; i++) {
2116		if (pollv[i].fd == pollinfo.fd)
2117			pollv[i].revents = pollinfo.revents;
2118		else
2119			pollv[i].revents = 0;
2120	}
2121	poller_pid = pollinfo.pid;
2122	return 1;
2123}
2124
2125static void
2126wakeup_handler(int sig)
2127{
2128}
2129
2130static void
2131proc_poller(int pfd)
2132{
2133	struct proc_pollfd pollinfo;
2134	struct sigaction sa;
2135	sigset_t blocked_set, empty_set;
2136	int i;
2137	int n;
2138	struct rlimit rl;
2139#ifdef FREEBSD
2140	struct procfs_status pfs;
2141#endif /* FREEBSD */
2142
2143	switch (fork()) {
2144	case -1:
2145		perror_msg_and_die("fork");
2146	case 0:
2147		break;
2148	default:
2149		return;
2150	}
2151
2152	sa.sa_handler = interactive ? SIG_DFL : SIG_IGN;
2153	sa.sa_flags = 0;
2154	sigemptyset(&sa.sa_mask);
2155	sigaction(SIGHUP, &sa, NULL);
2156	sigaction(SIGINT, &sa, NULL);
2157	sigaction(SIGQUIT, &sa, NULL);
2158	sigaction(SIGPIPE, &sa, NULL);
2159	sigaction(SIGTERM, &sa, NULL);
2160	sa.sa_handler = wakeup_handler;
2161	sigaction(SIGUSR1, &sa, NULL);
2162	sigemptyset(&blocked_set);
2163	sigaddset(&blocked_set, SIGUSR1);
2164	sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2165	sigemptyset(&empty_set);
2166
2167	if (getrlimit(RLIMIT_NOFILE, &rl) < 0) {
2168		perror_msg_and_die("getrlimit(RLIMIT_NOFILE, ...)");
2169	}
2170	n = rl.rlim_cur;
2171	for (i = 0; i < n; i++) {
2172		if (i != pfd && i != proc_poll_pipe[1])
2173			close(i);
2174	}
2175
2176	pollinfo.fd = pfd;
2177	pollinfo.pid = getpid();
2178	for (;;) {
2179#ifndef FREEBSD
2180		if (ioctl(pfd, PIOCWSTOP, NULL) < 0)
2181#else
2182		if (ioctl(pfd, PIOCWSTOP, &pfs) < 0)
2183#endif
2184		{
2185			switch (errno) {
2186			case EINTR:
2187				continue;
2188			case EBADF:
2189				pollinfo.revents = POLLERR;
2190				break;
2191			case ENOENT:
2192				pollinfo.revents = POLLHUP;
2193				break;
2194			default:
2195				perror("proc_poller: PIOCWSTOP");
2196			}
2197			write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
2198			_exit(0);
2199		}
2200		pollinfo.revents = POLLWANT;
2201		write(proc_poll_pipe[1], &pollinfo, sizeof(pollinfo));
2202		sigsuspend(&empty_set);
2203	}
2204}
2205
2206#endif /* !HAVE_POLLABLE_PROCFS */
2207
2208static int
2209choose_pfd()
2210{
2211	int i, j;
2212	struct tcb *tcp;
2213
2214	static int last;
2215
2216	if (followfork < 2 &&
2217	    last < nprocs && (pollv[last].revents & POLLWANT)) {
2218		/*
2219		 * The previous process is ready to run again.  We'll
2220		 * let it do so if it is currently in a syscall.  This
2221		 * heuristic improves the readability of the trace.
2222		 */
2223		tcp = pfd2tcb(pollv[last].fd);
2224		if (tcp && exiting(tcp))
2225			return pollv[last].fd;
2226	}
2227
2228	for (i = 0; i < nprocs; i++) {
2229		/* Let competing children run round robin. */
2230		j = (i + last + 1) % nprocs;
2231		if (pollv[j].revents & (POLLHUP | POLLERR)) {
2232			tcp = pfd2tcb(pollv[j].fd);
2233			if (!tcp) {
2234				error_msg_and_die("lost proc");
2235			}
2236			droptcb(tcp);
2237			return -1;
2238		}
2239		if (pollv[j].revents & POLLWANT) {
2240			last = j;
2241			return pollv[j].fd;
2242		}
2243	}
2244	error_msg_and_die("nothing ready");
2245}
2246
2247static int
2248trace(void)
2249{
2250#ifdef POLL_HACK
2251	struct tcb *in_syscall = NULL;
2252#endif
2253	struct tcb *tcp;
2254	int pfd;
2255	int what;
2256	int ioctl_result = 0, ioctl_errno = 0;
2257	long arg;
2258
2259	for (;;) {
2260		if (interactive)
2261			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2262
2263		if (nprocs == 0)
2264			break;
2265
2266		switch (nprocs) {
2267		case 1:
2268#ifndef HAVE_POLLABLE_PROCFS
2269			if (proc_poll_pipe[0] == -1) {
2270#endif
2271				tcp = first_used_tcb();
2272				if (!tcp)
2273					continue;
2274				pfd = tcp->pfd;
2275				if (pfd == -1)
2276					continue;
2277				break;
2278#ifndef HAVE_POLLABLE_PROCFS
2279			}
2280			/* fall through ... */
2281#endif /* !HAVE_POLLABLE_PROCFS */
2282		default:
2283#ifdef HAVE_POLLABLE_PROCFS
2284#ifdef POLL_HACK
2285		        /* On some systems (e.g. UnixWare) we get too much ugly
2286			   "unfinished..." stuff when multiple proceses are in
2287			   syscalls.  Here's a nasty hack */
2288
2289			if (in_syscall) {
2290				struct pollfd pv;
2291				tcp = in_syscall;
2292				in_syscall = NULL;
2293				pv.fd = tcp->pfd;
2294				pv.events = POLLWANT;
2295				what = poll(&pv, 1, 1);
2296				if (what < 0) {
2297					if (interrupted)
2298						return 0;
2299					continue;
2300				}
2301				else if (what == 1 && pv.revents & POLLWANT) {
2302					goto FOUND;
2303				}
2304			}
2305#endif
2306
2307			if (poll(pollv, nprocs, INFTIM) < 0) {
2308				if (interrupted)
2309					return 0;
2310				continue;
2311			}
2312#else /* !HAVE_POLLABLE_PROCFS */
2313			if (proc_poll(pollv, nprocs, INFTIM) < 0) {
2314				if (interrupted)
2315					return 0;
2316				continue;
2317			}
2318#endif /* !HAVE_POLLABLE_PROCFS */
2319			pfd = choose_pfd();
2320			if (pfd == -1)
2321				continue;
2322			break;
2323		}
2324
2325		/* Look up `pfd' in our table. */
2326		tcp = pfd2tcb(pfd);
2327		if (tcp == NULL) {
2328			error_msg_and_die("unknown pfd: %u", pfd);
2329		}
2330#ifdef POLL_HACK
2331	FOUND:
2332#endif
2333		/* Get the status of the process. */
2334		if (!interrupted) {
2335#ifndef FREEBSD
2336			ioctl_result = IOCTL_WSTOP(tcp);
2337#else /* FREEBSD */
2338			/* Thanks to some scheduling mystery, the first poller
2339			   sometimes waits for the already processed end of fork
2340			   event. Doing a non blocking poll here solves the problem. */
2341			if (proc_poll_pipe[0] != -1)
2342				ioctl_result = IOCTL_STATUS(tcp);
2343			else
2344				ioctl_result = IOCTL_WSTOP(tcp);
2345#endif /* FREEBSD */
2346			ioctl_errno = errno;
2347#ifndef HAVE_POLLABLE_PROCFS
2348			if (proc_poll_pipe[0] != -1) {
2349				if (ioctl_result < 0)
2350					kill(poller_pid, SIGKILL);
2351				else
2352					kill(poller_pid, SIGUSR1);
2353			}
2354#endif /* !HAVE_POLLABLE_PROCFS */
2355		}
2356		if (interrupted)
2357			return 0;
2358
2359		if (interactive)
2360			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2361
2362		if (ioctl_result < 0) {
2363			/* Find out what happened if it failed. */
2364			switch (ioctl_errno) {
2365			case EINTR:
2366			case EBADF:
2367				continue;
2368#ifdef FREEBSD
2369			case ENOTTY:
2370#endif
2371			case ENOENT:
2372				droptcb(tcp);
2373				continue;
2374			default:
2375				perror_msg_and_die("PIOCWSTOP");
2376			}
2377		}
2378
2379#ifdef FREEBSD
2380		if ((tcp->flags & TCB_STARTUP) && (tcp->status.PR_WHY == PR_SYSEXIT)) {
2381			/* discard first event for a syscall we never entered */
2382			IOCTL(tcp->pfd, PIOCRUN, 0);
2383			continue;
2384		}
2385#endif
2386
2387		/* clear the just started flag */
2388		tcp->flags &= ~TCB_STARTUP;
2389
2390		/* set current output file */
2391		outf = tcp->outf;
2392		curcol = tcp->curcol;
2393
2394		if (cflag) {
2395			struct timeval stime;
2396#ifdef FREEBSD
2397			char buf[1024];
2398			int len;
2399
2400			len = pread(tcp->pfd_status, buf, sizeof(buf) - 1, 0);
2401			if (len > 0) {
2402				buf[len] = '\0';
2403				sscanf(buf,
2404				       "%*s %*d %*d %*d %*d %*d,%*d %*s %*d,%*d %*d,%*d %ld,%ld",
2405				       &stime.tv_sec, &stime.tv_usec);
2406			} else
2407				stime.tv_sec = stime.tv_usec = 0;
2408#else /* !FREEBSD */
2409			stime.tv_sec = tcp->status.pr_stime.tv_sec;
2410			stime.tv_usec = tcp->status.pr_stime.tv_nsec/1000;
2411#endif /* !FREEBSD */
2412			tv_sub(&tcp->dtime, &stime, &tcp->stime);
2413			tcp->stime = stime;
2414		}
2415		what = tcp->status.PR_WHAT;
2416		switch (tcp->status.PR_WHY) {
2417#ifndef FREEBSD
2418		case PR_REQUESTED:
2419			if (tcp->status.PR_FLAGS & PR_ASLEEP) {
2420				tcp->status.PR_WHY = PR_SYSENTRY;
2421				if (trace_syscall(tcp) < 0) {
2422					error_msg_and_die("syscall trouble");
2423				}
2424			}
2425			break;
2426#endif /* !FREEBSD */
2427		case PR_SYSENTRY:
2428#ifdef POLL_HACK
2429		        in_syscall = tcp;
2430#endif
2431		case PR_SYSEXIT:
2432			if (trace_syscall(tcp) < 0) {
2433				error_msg_and_die("syscall trouble");
2434			}
2435			break;
2436		case PR_SIGNALLED:
2437			if (cflag != CFLAG_ONLY_STATS
2438			    && (qual_flags[what] & QUAL_SIGNAL)) {
2439				printleader(tcp);
2440				tprintf("--- %s (%s) ---\n",
2441					signame(what), strsignal(what));
2442				printing_tcp = NULL;
2443#ifdef PR_INFO
2444				if (tcp->status.PR_INFO.si_signo == what) {
2445					printleader(tcp);
2446					tprints("    siginfo=");
2447					printsiginfo(&tcp->status.PR_INFO, 1);
2448					tprints("\n");
2449					printing_tcp = NULL;
2450				}
2451#endif
2452			}
2453			break;
2454		case PR_FAULTED:
2455			if (cflag != CFLAGS_ONLY_STATS
2456			    && (qual_flags[what] & QUAL_FAULT)) {
2457				printleader(tcp);
2458				tprintf("=== FAULT %d ===\n", what);
2459				printing_tcp = NULL;
2460			}
2461			break;
2462#ifdef FREEBSD
2463		case 0: /* handle case we polled for nothing */
2464			continue;
2465#endif
2466		default:
2467			error_msg_and_die("odd stop %d", tcp->status.PR_WHY);
2468			break;
2469		}
2470		/* Remember current print column before continuing. */
2471		tcp->curcol = curcol;
2472		arg = 0;
2473#ifndef FREEBSD
2474		if (IOCTL(tcp->pfd, PIOCRUN, &arg) < 0)
2475#else
2476		if (IOCTL(tcp->pfd, PIOCRUN, 0) < 0)
2477#endif
2478		{
2479			perror_msg_and_die("PIOCRUN");
2480		}
2481	}
2482	return 0;
2483}
2484
2485#else /* !USE_PROCFS */
2486
2487static int
2488trace(void)
2489{
2490#ifdef LINUX
2491	struct rusage ru;
2492	struct rusage *rup = cflag ? &ru : NULL;
2493# ifdef __WALL
2494	static int wait4_options = __WALL;
2495# endif
2496#endif /* LINUX */
2497
2498	while (nprocs != 0) {
2499		int pid;
2500		int wait_errno;
2501		int status, sig;
2502		int stopped;
2503		struct tcb *tcp;
2504		unsigned event;
2505
2506		if (interrupted)
2507			return 0;
2508		if (interactive)
2509			sigprocmask(SIG_SETMASK, &empty_set, NULL);
2510#ifdef LINUX
2511# ifdef __WALL
2512		pid = wait4(-1, &status, wait4_options, rup);
2513		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
2514			/* this kernel does not support __WALL */
2515			wait4_options &= ~__WALL;
2516			pid = wait4(-1, &status, wait4_options, rup);
2517		}
2518		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
2519			/* most likely a "cloned" process */
2520			pid = wait4(-1, &status, __WCLONE, rup);
2521			if (pid < 0) {
2522				perror_msg("wait4(__WCLONE) failed");
2523			}
2524		}
2525# else
2526		pid = wait4(-1, &status, 0, rup);
2527# endif /* __WALL */
2528#endif /* LINUX */
2529#ifdef SUNOS4
2530		pid = wait(&status);
2531#endif
2532		wait_errno = errno;
2533		if (interactive)
2534			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
2535
2536		if (pid < 0) {
2537			switch (wait_errno) {
2538			case EINTR:
2539				continue;
2540			case ECHILD:
2541				/*
2542				 * We would like to verify this case
2543				 * but sometimes a race in Solbourne's
2544				 * version of SunOS sometimes reports
2545				 * ECHILD before sending us SIGCHILD.
2546				 */
2547				return 0;
2548			default:
2549				errno = wait_errno;
2550				perror("strace: wait");
2551				return -1;
2552			}
2553		}
2554		if (pid == popen_pid) {
2555			if (WIFEXITED(status) || WIFSIGNALED(status))
2556				popen_pid = 0;
2557			continue;
2558		}
2559
2560		event = ((unsigned)status >> 16);
2561		if (debug) {
2562			char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
2563#ifdef LINUX
2564			if (event != 0) {
2565				static const char *const event_names[] = {
2566					[PTRACE_EVENT_CLONE] = "CLONE",
2567					[PTRACE_EVENT_FORK]  = "FORK",
2568					[PTRACE_EVENT_VFORK] = "VFORK",
2569					[PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
2570					[PTRACE_EVENT_EXEC]  = "EXEC",
2571					[PTRACE_EVENT_EXIT]  = "EXIT",
2572				};
2573				const char *e;
2574				if (event < ARRAY_SIZE(event_names))
2575					e = event_names[event];
2576				else {
2577					sprintf(buf, "?? (%u)", event);
2578					e = buf;
2579				}
2580				fprintf(stderr, " PTRACE_EVENT_%s", e);
2581			}
2582#endif
2583			strcpy(buf, "???");
2584			if (WIFSIGNALED(status))
2585#ifdef WCOREDUMP
2586				sprintf(buf, "WIFSIGNALED,%ssig=%s",
2587						WCOREDUMP(status) ? "core," : "",
2588						signame(WTERMSIG(status)));
2589#else
2590				sprintf(buf, "WIFSIGNALED,sig=%s",
2591						signame(WTERMSIG(status)));
2592#endif
2593			if (WIFEXITED(status))
2594				sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
2595			if (WIFSTOPPED(status))
2596				sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
2597#ifdef WIFCONTINUED
2598			if (WIFCONTINUED(status))
2599				strcpy(buf, "WIFCONTINUED");
2600#endif
2601			fprintf(stderr, " [wait(0x%04x) = %u] %s\n", status, pid, buf);
2602		}
2603
2604		/* Look up 'pid' in our table. */
2605		tcp = pid2tcb(pid);
2606
2607#ifdef LINUX
2608		/* Under Linux, execve changes pid to thread leader's pid,
2609		 * and we see this changed pid on EVENT_EXEC and later,
2610		 * execve sysexit. Leader "disappears" without exit
2611		 * notification. Let user know that, drop leader's tcb,
2612		 * and fix up pid in execve thread's tcb.
2613		 * Effectively, execve thread's tcb replaces leader's tcb.
2614		 *
2615		 * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
2616		 * on exit syscall) in multithreaded programs exactly
2617		 * in order to handle this case.
2618		 *
2619		 * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
2620		 * On 2.6 and earlier, it can return garbage.
2621		 */
2622		if (event == PTRACE_EVENT_EXEC && os_release[0] >= '3') {
2623			long old_pid = 0;
2624			if (ptrace(PTRACE_GETEVENTMSG, pid, NULL, (long) &old_pid) >= 0
2625			 && old_pid > 0
2626			 && old_pid != pid
2627			) {
2628				struct tcb *execve_thread = pid2tcb(old_pid);
2629				if (tcp) {
2630					outf = tcp->outf;
2631					curcol = tcp->curcol;
2632					if (!cflag) {
2633						if (printing_tcp)
2634							tprints(" <unfinished ...>\n");
2635						printleader(tcp);
2636						tprintf("+++ superseded by execve in pid %lu +++\n", old_pid);
2637						printing_tcp = NULL;
2638						fflush(outf);
2639					}
2640					if (execve_thread) {
2641						/* swap output FILEs (needed for -ff) */
2642						tcp->outf = execve_thread->outf;
2643						execve_thread->outf = outf;
2644					}
2645					droptcb(tcp);
2646				}
2647				tcp = execve_thread;
2648				if (tcp) {
2649					tcp->pid = pid;
2650					tcp->flags |= TCB_REPRINT;
2651				}
2652			}
2653		}
2654#endif
2655
2656		if (tcp == NULL) {
2657#ifdef LINUX
2658			if (followfork) {
2659				/* This is needed to go with the CLONE_PTRACE
2660				   changes in process.c/util.c: we might see
2661				   the child's initial trap before we see the
2662				   parent return from the clone syscall.
2663				   Leave the child suspended until the parent
2664				   returns from its system call.  Only then
2665				   will we have the association of parent and
2666				   child so that we know how to do clearbpt
2667				   in the child.  */
2668				tcp = alloctcb(pid);
2669				tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
2670				if (!qflag)
2671					fprintf(stderr, "Process %d attached\n",
2672						pid);
2673			}
2674			else
2675				/* This can happen if a clone call used
2676				   CLONE_PTRACE itself.  */
2677#endif
2678			{
2679				if (WIFSTOPPED(status))
2680					ptrace(PTRACE_CONT, pid, (char *) 1, 0);
2681				error_msg_and_die("Unknown pid: %u", pid);
2682			}
2683		}
2684		/* set current output file */
2685		outf = tcp->outf;
2686		curcol = tcp->curcol;
2687#ifdef LINUX
2688		if (cflag) {
2689			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
2690			tcp->stime = ru.ru_stime;
2691		}
2692#endif
2693
2694		if (WIFSIGNALED(status)) {
2695			if (pid == strace_child)
2696				exit_code = 0x100 | WTERMSIG(status);
2697			if (cflag != CFLAG_ONLY_STATS
2698			    && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL)) {
2699				printleader(tcp);
2700#ifdef WCOREDUMP
2701				tprintf("+++ killed by %s %s+++\n",
2702					signame(WTERMSIG(status)),
2703					WCOREDUMP(status) ? "(core dumped) " : "");
2704#else
2705				tprintf("+++ killed by %s +++\n",
2706					signame(WTERMSIG(status)));
2707#endif
2708				printing_tcp = NULL;
2709			}
2710			fflush(tcp->outf);
2711			droptcb(tcp);
2712			continue;
2713		}
2714		if (WIFEXITED(status)) {
2715			if (pid == strace_child)
2716				exit_code = WEXITSTATUS(status);
2717			if (tcp == printing_tcp) {
2718				tprints(" <unfinished ...>\n");
2719				printing_tcp = NULL;
2720			}
2721			if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
2722				printleader(tcp);
2723				tprintf("+++ exited with %d +++\n", WEXITSTATUS(status));
2724				printing_tcp = NULL;
2725			}
2726			fflush(tcp->outf);
2727			droptcb(tcp);
2728			continue;
2729		}
2730		if (!WIFSTOPPED(status)) {
2731			fprintf(stderr, "PANIC: pid %u not stopped\n", pid);
2732			droptcb(tcp);
2733			continue;
2734		}
2735
2736		/* Is this the very first time we see this tracee stopped? */
2737		if (tcp->flags & TCB_STARTUP) {
2738			if (debug)
2739				fprintf(stderr, "pid %d has TCB_STARTUP, initializing it\n", tcp->pid);
2740			tcp->flags &= ~TCB_STARTUP;
2741			if (tcp->flags & TCB_BPTSET) {
2742				/*
2743				 * One example is a breakpoint inherited from
2744				 * parent through fork().
2745				 */
2746				if (clearbpt(tcp) < 0) {
2747					/* Pretty fatal */
2748					droptcb(tcp);
2749					cleanup();
2750					return -1;
2751				}
2752			}
2753#ifdef LINUX
2754			if (ptrace_setoptions) {
2755				if (debug)
2756					fprintf(stderr, "setting opts %x on pid %d\n", ptrace_setoptions, tcp->pid);
2757				if (ptrace(PTRACE_SETOPTIONS, tcp->pid, NULL, ptrace_setoptions) < 0) {
2758					if (errno != ESRCH) {
2759						/* Should never happen, really */
2760						perror_msg_and_die("PTRACE_SETOPTIONS");
2761					}
2762				}
2763			}
2764#endif
2765		}
2766
2767		sig = WSTOPSIG(status);
2768
2769		if (event != 0) {
2770			/* Ptrace event */
2771#ifdef USE_SEIZE
2772			if (event == PTRACE_EVENT_STOP || event == PTRACE_EVENT_STOP1) {
2773				/*
2774				 * PTRACE_INTERRUPT-stop or group-stop.
2775				 * PTRACE_INTERRUPT-stop has sig == SIGTRAP here.
2776				 */
2777				if (sig == SIGSTOP
2778				 || sig == SIGTSTP
2779				 || sig == SIGTTIN
2780				 || sig == SIGTTOU
2781				) {
2782					stopped = 1;
2783					goto show_stopsig;
2784				}
2785			}
2786#endif
2787			goto restart_tracee_with_sig_0;
2788		}
2789
2790		/* Is this post-attach SIGSTOP?
2791		 * Interestingly, the process may stop
2792		 * with STOPSIG equal to some other signal
2793		 * than SIGSTOP if we happend to attach
2794		 * just before the process takes a signal.
2795		 */
2796		if (sig == SIGSTOP && (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
2797			if (debug)
2798				fprintf(stderr, "ignored SIGSTOP on pid %d\n", tcp->pid);
2799			tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
2800			goto restart_tracee_with_sig_0;
2801		}
2802
2803		if (sig != syscall_trap_sig) {
2804			siginfo_t si;
2805
2806			/* Nonzero (true) if tracee is stopped by signal
2807			 * (as opposed to "tracee received signal").
2808			 */
2809			stopped = (ptrace(PTRACE_GETSIGINFO, pid, 0, (long) &si) < 0);
2810#ifdef USE_SEIZE
2811 show_stopsig:
2812#endif
2813			if (cflag != CFLAG_ONLY_STATS
2814			    && (qual_flags[sig] & QUAL_SIGNAL)) {
2815#if defined(PT_CR_IPSR) && defined(PT_CR_IIP)
2816				long pc = 0;
2817				long psr = 0;
2818
2819				upeek(tcp, PT_CR_IPSR, &psr);
2820				upeek(tcp, PT_CR_IIP, &pc);
2821
2822# define PSR_RI	41
2823				pc += (psr >> PSR_RI) & 0x3;
2824# define PC_FORMAT_STR	" @ %lx"
2825# define PC_FORMAT_ARG	, pc
2826#else
2827# define PC_FORMAT_STR	""
2828# define PC_FORMAT_ARG	/* nothing */
2829#endif
2830				printleader(tcp);
2831				if (!stopped) {
2832					tprints("--- ");
2833					printsiginfo(&si, verbose(tcp));
2834					tprintf(" (%s)" PC_FORMAT_STR " ---\n",
2835						strsignal(sig)
2836						PC_FORMAT_ARG);
2837				} else
2838					tprintf("--- %s by %s" PC_FORMAT_STR " ---\n",
2839						strsignal(sig),
2840						signame(sig)
2841						PC_FORMAT_ARG);
2842				printing_tcp = NULL;
2843				fflush(tcp->outf);
2844			}
2845
2846			if (!stopped)
2847				/* It's signal-delivery-stop. Inject the signal */
2848				goto restart_tracee;
2849
2850			/* It's group-stop */
2851#ifdef USE_SEIZE
2852			if (use_seize) {
2853				/*
2854				 * This ends ptrace-stop, but does *not* end group-stop.
2855				 * This makes stopping signals work properly on straced process
2856				 * (that is, process really stops. It used to continue to run).
2857				 */
2858				if (ptrace_restart(PTRACE_LISTEN, tcp, 0) < 0) {
2859					cleanup();
2860					return -1;
2861				}
2862				continue;
2863			}
2864			/* We don't have PTRACE_LISTEN support... */
2865#endif
2866			goto restart_tracee;
2867		}
2868
2869		/* We handled quick cases, we are permitted to interrupt now. */
2870		if (interrupted)
2871			return 0;
2872
2873		/* This should be syscall entry or exit.
2874		 * (Or it still can be that pesky post-execve SIGTRAP!)
2875		 * Handle it.
2876		 */
2877		if (trace_syscall(tcp) < 0 && !tcp->ptrace_errno) {
2878			/* ptrace() failed in trace_syscall() with ESRCH.
2879			 * Likely a result of process disappearing mid-flight.
2880			 * Observed case: exit_group() terminating
2881			 * all processes in thread group.
2882			 */
2883			if (tcp->flags & TCB_ATTACHED) {
2884				if (printing_tcp) {
2885					/* Do we have dangling line "syscall(param, param"?
2886					 * Finish the line then.
2887					 */
2888					printing_tcp->flags |= TCB_REPRINT;
2889					tprints(" <unfinished ...>\n");
2890					printing_tcp = NULL;
2891					fflush(tcp->outf);
2892				}
2893				/* We assume that ptrace error was caused by process death.
2894				 * We used to detach(tcp) here, but since we no longer
2895				 * implement "detach before death" policy/hack,
2896				 * we can let this process to report its death to us
2897				 * normally, via WIFEXITED or WIFSIGNALED wait status.
2898				 */
2899			} else {
2900				/* It's our real child (and we also trace it) */
2901				/* my_tkill(pid, SIGKILL); - why? */
2902				/* droptcb(tcp); - why? */
2903			}
2904			continue;
2905		}
2906 restart_tracee_with_sig_0:
2907		sig = 0;
2908 restart_tracee:
2909		/* Remember current print column before continuing. */
2910		tcp->curcol = curcol;
2911		if (ptrace_restart(PTRACE_SYSCALL, tcp, sig) < 0) {
2912			cleanup();
2913			return -1;
2914		}
2915	}
2916	return 0;
2917}
2918
2919#endif /* !USE_PROCFS */
2920
2921void
2922tprintf(const char *fmt, ...)
2923{
2924	va_list args;
2925
2926	va_start(args, fmt);
2927	if (outf) {
2928		int n = vfprintf(outf, fmt, args);
2929		if (n < 0) {
2930			if (outf != stderr)
2931				perror(outfname == NULL
2932				       ? "<writing to pipe>" : outfname);
2933		} else
2934			curcol += n;
2935	}
2936	va_end(args);
2937}
2938
2939void
2940tprints(const char *str)
2941{
2942	if (outf) {
2943		int n = fputs(str, outf);
2944		if (n >= 0) {
2945			curcol += strlen(str);
2946			return;
2947		}
2948		if (outf != stderr)
2949			perror(outfname == NULL
2950			       ? "<writing to pipe>" : outfname);
2951	}
2952}
2953
2954void
2955printleader(struct tcb *tcp)
2956{
2957	if (printing_tcp) {
2958		if (printing_tcp->ptrace_errno) {
2959			if (printing_tcp->flags & TCB_INSYSCALL) {
2960				tprints(" <unavailable>) ");
2961				tabto();
2962			}
2963			tprints("= ? <unavailable>\n");
2964			printing_tcp->ptrace_errno = 0;
2965		} else if (!outfname || followfork < 2 || printing_tcp == tcp) {
2966			printing_tcp->flags |= TCB_REPRINT;
2967			tprints(" <unfinished ...>\n");
2968		}
2969	}
2970
2971	printing_tcp = tcp;
2972	curcol = 0;
2973	if ((followfork == 1 || pflag_seen > 1) && outfname)
2974		tprintf("%-5d ", tcp->pid);
2975	else if (nprocs > 1 && !outfname)
2976		tprintf("[pid %5u] ", tcp->pid);
2977	if (tflag) {
2978		char str[sizeof("HH:MM:SS")];
2979		struct timeval tv, dtv;
2980		static struct timeval otv;
2981
2982		gettimeofday(&tv, NULL);
2983		if (rflag) {
2984			if (otv.tv_sec == 0)
2985				otv = tv;
2986			tv_sub(&dtv, &tv, &otv);
2987			tprintf("%6ld.%06ld ",
2988				(long) dtv.tv_sec, (long) dtv.tv_usec);
2989			otv = tv;
2990		}
2991		else if (tflag > 2) {
2992			tprintf("%ld.%06ld ",
2993				(long) tv.tv_sec, (long) tv.tv_usec);
2994		}
2995		else {
2996			time_t local = tv.tv_sec;
2997			strftime(str, sizeof(str), "%T", localtime(&local));
2998			if (tflag > 1)
2999				tprintf("%s.%06ld ", str, (long) tv.tv_usec);
3000			else
3001				tprintf("%s ", str);
3002		}
3003	}
3004	if (iflag)
3005		printcall(tcp);
3006}
3007
3008void
3009tabto(void)
3010{
3011	if (curcol < acolumn)
3012		tprints(acolumn_spaces + curcol);
3013}
3014
3015#ifdef HAVE_MP_PROCFS
3016
3017int
3018mp_ioctl(int fd, int cmd, void *arg, int size)
3019{
3020	struct iovec iov[2];
3021	int n = 1;
3022
3023	iov[0].iov_base = &cmd;
3024	iov[0].iov_len = sizeof cmd;
3025	if (arg) {
3026		++n;
3027		iov[1].iov_base = arg;
3028		iov[1].iov_len = size;
3029	}
3030
3031	return writev(fd, iov, n);
3032}
3033
3034#endif
3035