trace.c revision d7e4ca82e1cf20bb2605befb1da74dd1688c706e
1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2007,2011,2012,2013 Petr Machata, Red Hat Inc.
4 * Copyright (C) 2010 Joe Damato
5 * Copyright (C) 1998,2002,2003,2004,2008,2009 Juan Cespedes
6 * Copyright (C) 2006 Ian Wienand
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
21 * 02110-1301 USA
22 */
23
24#include "config.h"
25
26#include <asm/unistd.h>
27#include <sys/types.h>
28#include <sys/wait.h>
29#include <assert.h>
30#include <errno.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <unistd.h>
35
36#ifdef HAVE_LIBSELINUX
37# include <selinux/selinux.h>
38#endif
39
40#include "linux-gnu/trace.h"
41#include "linux-gnu/trace-defs.h"
42#include "backend.h"
43#include "breakpoint.h"
44#include "debug.h"
45#include "events.h"
46#include "options.h"
47#include "proc.h"
48#include "ptrace.h"
49#include "type.h"
50
51void
52trace_fail_warning(pid_t pid)
53{
54	/* This was adapted from GDB.  */
55#ifdef HAVE_LIBSELINUX
56	static int checked = 0;
57	if (checked)
58		return;
59	checked = 1;
60
61	/* -1 is returned for errors, 0 if it has no effect, 1 if
62	 * PTRACE_ATTACH is forbidden.  */
63	if (security_get_boolean_active("deny_ptrace") == 1)
64		fprintf(stderr,
65"The SELinux boolean 'deny_ptrace' is enabled, which may prevent ltrace from\n"
66"tracing other processes.  You can disable this process attach protection by\n"
67"issuing 'setsebool deny_ptrace=0' in the superuser context.\n");
68#endif /* HAVE_LIBSELINUX */
69}
70
71void
72trace_me(void)
73{
74	debug(DEBUG_PROCESS, "trace_me: pid=%d", getpid());
75	if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
76		perror("PTRACE_TRACEME");
77		trace_fail_warning(getpid());
78		exit(1);
79	}
80}
81
82/* There's a (hopefully) brief period of time after the child process
83 * forks when we can't trace it yet.  Here we wait for kernel to
84 * prepare the process.  */
85int
86wait_for_proc(pid_t pid)
87{
88	/* man ptrace: PTRACE_ATTACH attaches to the process specified
89	   in pid.  The child is sent a SIGSTOP, but will not
90	   necessarily have stopped by the completion of this call;
91	   use wait() to wait for the child to stop. */
92	if (waitpid(pid, NULL, __WALL) != pid) {
93		perror ("trace_pid: waitpid");
94		return -1;
95	}
96
97	return 0;
98}
99
100int
101trace_pid(pid_t pid)
102{
103	debug(DEBUG_PROCESS, "trace_pid: pid=%d", pid);
104	/* This shouldn't emit error messages, as there are legitimate
105	 * reasons that the PID can't be attached: like it may have
106	 * already ended.  */
107	if (ptrace(PTRACE_ATTACH, pid, 0, 0) < 0)
108		return -1;
109
110	return wait_for_proc(pid);
111}
112
113void
114trace_set_options(struct process *proc)
115{
116	if (proc->tracesysgood & 0x80)
117		return;
118
119	pid_t pid = proc->pid;
120	debug(DEBUG_PROCESS, "trace_set_options: pid=%d", pid);
121
122	long options = PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK |
123		PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE |
124		PTRACE_O_TRACEEXEC;
125	if (ptrace(PTRACE_SETOPTIONS, pid, 0, (void *)options) < 0 &&
126	    ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)options) < 0) {
127		perror("PTRACE_SETOPTIONS");
128		return;
129	}
130	proc->tracesysgood |= 0x80;
131}
132
133void
134untrace_pid(pid_t pid) {
135	debug(DEBUG_PROCESS, "untrace_pid: pid=%d", pid);
136	ptrace(PTRACE_DETACH, pid, 0, 0);
137}
138
139void
140continue_after_signal(pid_t pid, int signum)
141{
142	debug(DEBUG_PROCESS, "continue_after_signal: pid=%d, signum=%d",
143	      pid, signum);
144	ptrace(PTRACE_SYSCALL, pid, 0, (void *)(uintptr_t)signum);
145}
146
147static enum ecb_status
148event_for_pid(Event *event, void *data)
149{
150	if (event->proc != NULL && event->proc->pid == (pid_t)(uintptr_t)data)
151		return ECB_YIELD;
152	return ECB_CONT;
153}
154
155static int
156have_events_for(pid_t pid)
157{
158	return each_qd_event(event_for_pid, (void *)(uintptr_t)pid) != NULL;
159}
160
161void
162continue_process(pid_t pid)
163{
164	debug(DEBUG_PROCESS, "continue_process: pid=%d", pid);
165
166	/* Only really continue the process if there are no events in
167	   the queue for this process.  Otherwise just wait for the
168	   other events to arrive.  */
169	if (!have_events_for(pid))
170		/* We always trace syscalls to control fork(),
171		 * clone(), execve()... */
172		ptrace(PTRACE_SYSCALL, pid, 0, 0);
173	else
174		debug(DEBUG_PROCESS,
175		      "putting off the continue, events in que.");
176}
177
178static struct pid_task *
179get_task_info(struct pid_set *pids, pid_t pid)
180{
181	assert(pid != 0);
182	size_t i;
183	for (i = 0; i < pids->count; ++i)
184		if (pids->tasks[i].pid == pid)
185			return &pids->tasks[i];
186
187	return NULL;
188}
189
190static struct pid_task *
191add_task_info(struct pid_set *pids, pid_t pid)
192{
193	if (pids->count == pids->alloc) {
194		size_t ns = (2 * pids->alloc) ?: 4;
195		struct pid_task *n = realloc(pids->tasks,
196					     sizeof(*pids->tasks) * ns);
197		if (n == NULL)
198			return NULL;
199		pids->tasks = n;
200		pids->alloc = ns;
201	}
202	struct pid_task * task_info = &pids->tasks[pids->count++];
203	memset(task_info, 0, sizeof(*task_info));
204	task_info->pid = pid;
205	return task_info;
206}
207
208static enum callback_status
209task_stopped(struct process *task, void *data)
210{
211	enum process_status st = process_status(task->pid);
212	if (data != NULL)
213		*(enum process_status *)data = st;
214
215	/* If the task is already stopped, don't worry about it.
216	 * Likewise if it managed to become a zombie or terminate in
217	 * the meantime.  This can happen when the whole thread group
218	 * is terminating.  */
219	switch (st) {
220	case PS_INVALID:
221	case PS_TRACING_STOP:
222	case PS_ZOMBIE:
223		return CBS_CONT;
224	case PS_SLEEPING:
225	case PS_STOP:
226	case PS_OTHER:
227		return CBS_STOP;
228	}
229
230	abort ();
231}
232
233/* Task is blocked if it's stopped, or if it's a vfork parent.  */
234static enum callback_status
235task_blocked(struct process *task, void *data)
236{
237	struct pid_set *pids = data;
238	struct pid_task *task_info = get_task_info(pids, task->pid);
239	if (task_info != NULL
240	    && task_info->vforked)
241		return CBS_CONT;
242
243	return task_stopped(task, NULL);
244}
245
246static Event *process_vfork_on_event(struct event_handler *super, Event *event);
247
248static enum callback_status
249task_vforked(struct process *task, void *data)
250{
251	if (task->event_handler != NULL
252	    && task->event_handler->on_event == &process_vfork_on_event)
253		return CBS_STOP;
254	return CBS_CONT;
255}
256
257static int
258is_vfork_parent(struct process *task)
259{
260	return each_task(task->leader, NULL, &task_vforked, NULL) != NULL;
261}
262
263static enum callback_status
264send_sigstop(struct process *task, void *data)
265{
266	struct process *leader = task->leader;
267	struct pid_set *pids = data;
268
269	/* Look for pre-existing task record, or add new.  */
270	struct pid_task *task_info = get_task_info(pids, task->pid);
271	if (task_info == NULL)
272		task_info = add_task_info(pids, task->pid);
273	if (task_info == NULL) {
274		perror("send_sigstop: add_task_info");
275		destroy_event_handler(leader);
276		/* Signal failure upwards.  */
277		return CBS_STOP;
278	}
279
280	/* This task still has not been attached to.  It should be
281	   stopped by the kernel.  */
282	if (task->state == STATE_BEING_CREATED)
283		return CBS_CONT;
284
285	/* Don't bother sending SIGSTOP if we are already stopped, or
286	 * if we sent the SIGSTOP already, which happens when we are
287	 * handling "onexit" and inherited the handler from breakpoint
288	 * re-enablement.  */
289	enum process_status st;
290	if (task_stopped(task, &st) == CBS_CONT)
291		return CBS_CONT;
292	if (task_info->sigstopped) {
293		if (!task_info->delivered)
294			return CBS_CONT;
295		task_info->delivered = 0;
296	}
297
298	/* Also don't attempt to stop the process if it's a parent of
299	 * vforked process.  We set up event handler specially to hint
300	 * us.  In that case parent is in D state, which we use to
301	 * weed out unnecessary looping.  */
302	if (st == PS_SLEEPING
303	    && is_vfork_parent(task)) {
304		task_info->vforked = 1;
305		return CBS_CONT;
306	}
307
308	if (task_kill(task->pid, SIGSTOP) >= 0) {
309		debug(DEBUG_PROCESS, "send SIGSTOP to %d", task->pid);
310		task_info->sigstopped = 1;
311	} else
312		fprintf(stderr,
313			"Warning: couldn't send SIGSTOP to %d\n", task->pid);
314
315	return CBS_CONT;
316}
317
318/* On certain kernels, detaching right after a singlestep causes the
319   tracee to be killed with a SIGTRAP (that even though the singlestep
320   was properly caught by waitpid.  The ugly workaround is to put a
321   breakpoint where IP points and let the process continue.  After
322   this the breakpoint can be retracted and the process detached.  */
323static void
324ugly_workaround(struct process *proc)
325{
326	arch_addr_t ip = get_instruction_pointer(proc);
327	struct breakpoint **found = DICT_FIND(proc->leader->breakpoints, &ip,
328					      struct breakpoint *);
329	if (found != NULL)
330		enable_breakpoint(proc, *found);
331	else
332		insert_breakpoint(proc, ip, NULL);
333	ptrace(PTRACE_CONT, proc->pid, 0, 0);
334}
335
336static void
337process_stopping_done(struct process_stopping_handler *self,
338		      struct process *leader)
339{
340	debug(DEBUG_PROCESS, "process stopping done %d",
341	      self->task_enabling_breakpoint->pid);
342
343	if (!self->exiting) {
344		size_t i;
345		for (i = 0; i < self->pids.count; ++i)
346			if (self->pids.tasks[i].pid != 0
347			    && (self->pids.tasks[i].delivered
348				|| self->pids.tasks[i].sysret))
349				continue_process(self->pids.tasks[i].pid);
350		continue_process(self->task_enabling_breakpoint->pid);
351	}
352
353	if (self->exiting) {
354	ugly_workaround:
355		self->state = PSH_UGLY_WORKAROUND;
356		ugly_workaround(self->task_enabling_breakpoint);
357	} else {
358		switch ((self->ugly_workaround_p)(self)) {
359		case CBS_FAIL:
360			/* xxx handle me */
361		case CBS_STOP:
362			break;
363		case CBS_CONT:
364			goto ugly_workaround;
365		}
366		destroy_event_handler(leader);
367	}
368}
369
370/* Before we detach, we need to make sure that task's IP is on the
371 * edge of an instruction.  So for tasks that have a breakpoint event
372 * in the queue, we adjust the instruction pointer, just like
373 * continue_after_breakpoint does.  */
374static enum ecb_status
375undo_breakpoint(Event *event, void *data)
376{
377	if (event != NULL
378	    && event->proc->leader == data
379	    && event->type == EVENT_BREAKPOINT)
380		set_instruction_pointer(event->proc, event->e_un.brk_addr);
381	return ECB_CONT;
382}
383
384static enum callback_status
385untrace_task(struct process *task, void *data)
386{
387	if (task != data)
388		untrace_pid(task->pid);
389	return CBS_CONT;
390}
391
392static enum callback_status
393remove_task(struct process *task, void *data)
394{
395	/* Don't untrace leader just yet.  */
396	if (task != data)
397		remove_process(task);
398	return CBS_CONT;
399}
400
401static enum callback_status
402retract_breakpoint_cb(struct process *proc, struct breakpoint *bp, void *data)
403{
404	breakpoint_on_retract(bp, proc);
405	return CBS_CONT;
406}
407
408static void
409detach_process(struct process *leader)
410{
411	each_qd_event(&undo_breakpoint, leader);
412	disable_all_breakpoints(leader);
413	proc_each_breakpoint(leader, NULL, retract_breakpoint_cb, NULL);
414
415	/* Now untrace the process, if it was attached to by -p.  */
416	struct opt_p_t *it;
417	for (it = opt_p; it != NULL; it = it->next) {
418		struct process *proc = pid2proc(it->pid);
419		if (proc == NULL)
420			continue;
421		if (proc->leader == leader) {
422			each_task(leader, NULL, &untrace_task, NULL);
423			break;
424		}
425	}
426	each_task(leader, NULL, &remove_task, leader);
427	destroy_event_handler(leader);
428	remove_task(leader, NULL);
429}
430
431static void
432handle_stopping_event(struct pid_task *task_info, Event **eventp)
433{
434	/* Mark all events, so that we know whom to SIGCONT later.  */
435	if (task_info != NULL)
436		task_info->got_event = 1;
437
438	Event *event = *eventp;
439
440	/* In every state, sink SIGSTOP events for tasks that it was
441	 * sent to.  */
442	if (task_info != NULL
443	    && event->type == EVENT_SIGNAL
444	    && event->e_un.signum == SIGSTOP) {
445		debug(DEBUG_PROCESS, "SIGSTOP delivered to %d", task_info->pid);
446		if (task_info->sigstopped
447		    && !task_info->delivered) {
448			task_info->delivered = 1;
449			*eventp = NULL; // sink the event
450		} else
451			fprintf(stderr, "suspicious: %d got SIGSTOP, but "
452				"sigstopped=%d and delivered=%d\n",
453				task_info->pid, task_info->sigstopped,
454				task_info->delivered);
455	}
456}
457
458/* Some SIGSTOPs may have not been delivered to their respective tasks
459 * yet.  They are still in the queue.  If we have seen an event for
460 * that process, continue it, so that the SIGSTOP can be delivered and
461 * caught by ltrace.  We don't mind that the process is after
462 * breakpoint (and therefore potentially doesn't have aligned IP),
463 * because the signal will be delivered without the process actually
464 * starting.  */
465static void
466continue_for_sigstop_delivery(struct pid_set *pids)
467{
468	size_t i;
469	for (i = 0; i < pids->count; ++i) {
470		if (pids->tasks[i].pid != 0
471		    && pids->tasks[i].sigstopped
472		    && !pids->tasks[i].delivered
473		    && pids->tasks[i].got_event) {
474			debug(DEBUG_PROCESS, "continue %d for SIGSTOP delivery",
475			      pids->tasks[i].pid);
476			ptrace(PTRACE_SYSCALL, pids->tasks[i].pid, 0, 0);
477		}
478	}
479}
480
481static int
482event_exit_p(Event *event)
483{
484	return event != NULL && (event->type == EVENT_EXIT
485				 || event->type == EVENT_EXIT_SIGNAL);
486}
487
488static int
489event_exit_or_none_p(Event *event)
490{
491	return event == NULL || event_exit_p(event)
492		|| event->type == EVENT_NONE;
493}
494
495static int
496await_sigstop_delivery(struct pid_set *pids, struct pid_task *task_info,
497		       Event *event)
498{
499	/* If we still didn't get our SIGSTOP, continue the process
500	 * and carry on.  */
501	if (event != NULL && !event_exit_or_none_p(event)
502	    && task_info != NULL && task_info->sigstopped) {
503		debug(DEBUG_PROCESS, "continue %d for SIGSTOP delivery",
504		      task_info->pid);
505		/* We should get the signal the first thing
506		 * after this, so it should be OK to continue
507		 * even if we are over a breakpoint.  */
508		ptrace(PTRACE_SYSCALL, task_info->pid, 0, 0);
509
510	} else {
511		/* If all SIGSTOPs were delivered, uninstall the
512		 * handler and continue everyone.  */
513		/* XXX I suspect that we should check tasks that are
514		 * still around.  Is things are now, there should be a
515		 * race between waiting for everyone to stop and one
516		 * of the tasks exiting.  */
517		int all_clear = 1;
518		size_t i;
519		for (i = 0; i < pids->count; ++i)
520			if (pids->tasks[i].pid != 0
521			    && pids->tasks[i].sigstopped
522			    && !pids->tasks[i].delivered) {
523				all_clear = 0;
524				break;
525			}
526		return all_clear;
527	}
528
529	return 0;
530}
531
532static int
533all_stops_accountable(struct pid_set *pids)
534{
535	size_t i;
536	for (i = 0; i < pids->count; ++i)
537		if (pids->tasks[i].pid != 0
538		    && !pids->tasks[i].got_event
539		    && !have_events_for(pids->tasks[i].pid))
540			return 0;
541	return 1;
542}
543
544#ifndef ARCH_HAVE_SW_SINGLESTEP
545enum sw_singlestep_status
546arch_sw_singlestep(struct process *proc, struct breakpoint *bp,
547		   int (*add_cb)(arch_addr_t, struct sw_singlestep_data *),
548		   struct sw_singlestep_data *data)
549{
550	return SWS_HW;
551}
552#endif
553
554static Event *process_stopping_on_event(struct event_handler *super,
555					Event *event);
556
557static void
558remove_sw_breakpoints(struct process *proc)
559{
560	struct process_stopping_handler *self
561		= (void *)proc->leader->event_handler;
562	assert(self != NULL);
563	assert(self->super.on_event == process_stopping_on_event);
564
565	int ct = sizeof(self->sws_bp_addrs) / sizeof(*self->sws_bp_addrs);
566	int i;
567	for (i = 0; i < ct; ++i)
568		if (self->sws_bp_addrs[i] != 0) {
569			delete_breakpoint(proc, self->sws_bp_addrs[i]);
570			self->sws_bp_addrs[i] = 0;
571		}
572}
573
574static void
575sw_singlestep_bp_on_hit(struct breakpoint *bp, struct process *proc)
576{
577	remove_sw_breakpoints(proc);
578}
579
580struct sw_singlestep_data {
581	struct process_stopping_handler *self;
582};
583
584static int
585sw_singlestep_add_bp(arch_addr_t addr, struct sw_singlestep_data *data)
586{
587	struct process_stopping_handler *self = data->self;
588	struct process *proc = self->task_enabling_breakpoint;
589
590	int ct = sizeof(self->sws_bp_addrs)
591		/ sizeof(*self->sws_bp_addrs);
592	int i;
593	for (i = 0; i < ct; ++i)
594		if (self->sws_bp_addrs[i] == 0) {
595			self->sws_bp_addrs[i] = addr;
596			static struct bp_callbacks cbs = {
597				.on_hit = sw_singlestep_bp_on_hit,
598			};
599			struct breakpoint *bp
600				= insert_breakpoint(proc, addr, NULL);
601			breakpoint_set_callbacks(bp, &cbs);
602			return 0;
603		}
604
605	assert(!"Too many sw singlestep breakpoints!");
606	abort();
607}
608
609static int
610singlestep(struct process_stopping_handler *self)
611{
612	struct process *proc = self->task_enabling_breakpoint;
613
614	struct sw_singlestep_data data = { self };
615	switch (arch_sw_singlestep(self->task_enabling_breakpoint,
616				   self->breakpoint_being_enabled,
617				   &sw_singlestep_add_bp, &data)) {
618	case SWS_HW:
619		/* Otherwise do the default action: singlestep.  */
620		debug(1, "PTRACE_SINGLESTEP");
621		if (ptrace(PTRACE_SINGLESTEP, proc->pid, 0, 0)) {
622			perror("PTRACE_SINGLESTEP");
623			return -1;
624		}
625		return 0;
626
627	case SWS_OK:
628		return 0;
629
630	case SWS_FAIL:
631		return -1;
632	}
633	abort();
634}
635
636static void
637post_singlestep(struct process_stopping_handler *self,
638		struct Event **eventp)
639{
640	continue_for_sigstop_delivery(&self->pids);
641
642	if (*eventp != NULL && (*eventp)->type == EVENT_BREAKPOINT)
643		*eventp = NULL; // handled
644
645	struct process *proc = self->task_enabling_breakpoint;
646
647	remove_sw_breakpoints(proc);
648	self->breakpoint_being_enabled = NULL;
649}
650
651static void
652singlestep_error(struct process_stopping_handler *self)
653{
654	struct process *teb = self->task_enabling_breakpoint;
655	struct breakpoint *sbp = self->breakpoint_being_enabled;
656	fprintf(stderr, "%d couldn't continue when handling %s (%p) at %p\n",
657		teb->pid, breakpoint_name(sbp),	sbp->addr,
658		get_instruction_pointer(teb));
659	delete_breakpoint(teb->leader, sbp->addr);
660}
661
662static void
663pt_continue(struct process_stopping_handler *self)
664{
665	struct process *teb = self->task_enabling_breakpoint;
666	debug(1, "PTRACE_CONT");
667	ptrace(PTRACE_CONT, teb->pid, 0, 0);
668}
669
670static void
671pt_singlestep(struct process_stopping_handler *self)
672{
673	if (singlestep(self) < 0)
674		singlestep_error(self);
675}
676
677static void
678disable_and(struct process_stopping_handler *self,
679	    void (*do_this)(struct process_stopping_handler *self))
680{
681	struct process *teb = self->task_enabling_breakpoint;
682	debug(DEBUG_PROCESS, "all stopped, now singlestep/cont %d", teb->pid);
683	if (self->breakpoint_being_enabled->enabled)
684		disable_breakpoint(teb, self->breakpoint_being_enabled);
685	(do_this)(self);
686	self->state = PSH_SINGLESTEP;
687}
688
689void
690linux_ptrace_disable_and_singlestep(struct process_stopping_handler *self)
691{
692	disable_and(self, &pt_singlestep);
693}
694
695void
696linux_ptrace_disable_and_continue(struct process_stopping_handler *self)
697{
698	disable_and(self, &pt_continue);
699}
700
701/* This event handler is installed when we are in the process of
702 * stopping the whole thread group to do the pointer re-enablement for
703 * one of the threads.  We pump all events to the queue for later
704 * processing while we wait for all the threads to stop.  When this
705 * happens, we let the re-enablement thread to PTRACE_SINGLESTEP,
706 * re-enable, and continue everyone.  */
707static Event *
708process_stopping_on_event(struct event_handler *super, Event *event)
709{
710	struct process_stopping_handler *self = (void *)super;
711	struct process *task = event->proc;
712	struct process *leader = task->leader;
713	struct process *teb = self->task_enabling_breakpoint;
714
715	debug(DEBUG_PROCESS,
716	      "process_stopping_on_event: pid %d; event type %d; state %d",
717	      task->pid, event->type, self->state);
718
719	struct pid_task *task_info = get_task_info(&self->pids, task->pid);
720	if (task_info == NULL)
721		fprintf(stderr, "new task??? %d\n", task->pid);
722	handle_stopping_event(task_info, &event);
723
724	int state = self->state;
725	int event_to_queue = !event_exit_or_none_p(event);
726
727	/* Deactivate the entry if the task exits.  */
728	if (event_exit_p(event) && task_info != NULL)
729		task_info->pid = 0;
730
731	/* Always handle sysrets.  Whether sysret occurred and what
732	 * sys it rets from may need to be determined based on process
733	 * stack, so we need to keep that in sync with reality.  Note
734	 * that we don't continue the process after the sysret is
735	 * handled.  See continue_after_syscall.  */
736	if (event != NULL && event->type == EVENT_SYSRET) {
737		debug(1, "%d LT_EV_SYSRET", event->proc->pid);
738		event_to_queue = 0;
739		task_info->sysret = 1;
740	}
741
742	switch (state) {
743	case PSH_STOPPING:
744		/* If everyone is stopped, singlestep.  */
745		if (each_task(leader, NULL, &task_blocked,
746			      &self->pids) == NULL) {
747			(self->on_all_stopped)(self);
748			state = self->state;
749		}
750		break;
751
752	case PSH_SINGLESTEP:
753		/* In singlestep state, breakpoint signifies that we
754		 * have now stepped, and can re-enable the breakpoint.  */
755		if (event != NULL && task == teb) {
756
757			/* If this was caused by a real breakpoint, as
758			 * opposed to a singlestep, assume that it's
759			 * an artificial breakpoint installed for some
760			 * reason for the re-enablement.  In that case
761			 * handle it.  */
762			if (event->type == EVENT_BREAKPOINT) {
763				arch_addr_t ip
764					= get_instruction_pointer(task);
765				struct breakpoint *other
766					= address2bpstruct(leader, ip);
767				if (other != NULL)
768					breakpoint_on_hit(other, task);
769			}
770
771			/* If we got SIGNAL instead of BREAKPOINT,
772			 * then this is not singlestep at all.  */
773			if (event->type == EVENT_SIGNAL) {
774			do_singlestep:
775				if (singlestep(self) < 0) {
776					singlestep_error(self);
777					post_singlestep(self, &event);
778					goto psh_sinking;
779				}
780				break;
781			} else {
782				switch ((self->keep_stepping_p)(self)) {
783				case CBS_FAIL:
784					/* XXX handle me */
785				case CBS_STOP:
786					break;
787				case CBS_CONT:
788					/* Sink singlestep event.  */
789					if (event->type == EVENT_BREAKPOINT)
790						event = NULL;
791					goto do_singlestep;
792				}
793			}
794
795			/* Re-enable the breakpoint that we are
796			 * stepping over.  */
797			struct breakpoint *sbp = self->breakpoint_being_enabled;
798			if (sbp->enabled)
799				enable_breakpoint(teb, sbp);
800
801			post_singlestep(self, &event);
802			goto psh_sinking;
803		}
804		break;
805
806	psh_sinking:
807		state = self->state = PSH_SINKING;
808		/* Fall through.  */
809	case PSH_SINKING:
810		if (await_sigstop_delivery(&self->pids, task_info, event))
811			process_stopping_done(self, leader);
812		break;
813
814	case PSH_UGLY_WORKAROUND:
815		if (event == NULL)
816			break;
817		if (event->type == EVENT_BREAKPOINT) {
818			undo_breakpoint(event, leader);
819			if (task == teb)
820				self->task_enabling_breakpoint = NULL;
821		}
822		if (self->task_enabling_breakpoint == NULL
823		    && all_stops_accountable(&self->pids)) {
824			undo_breakpoint(event, leader);
825			detach_process(leader);
826			event = NULL; // handled
827		}
828	}
829
830	if (event != NULL && event_to_queue) {
831		enque_event(event);
832		event = NULL; // sink the event
833	}
834
835	return event;
836}
837
838static void
839process_stopping_destroy(struct event_handler *super)
840{
841	struct process_stopping_handler *self = (void *)super;
842	free(self->pids.tasks);
843}
844
845static enum callback_status
846no(struct process_stopping_handler *self)
847{
848	return CBS_STOP;
849}
850
851int
852process_install_stopping_handler(struct process *proc, struct breakpoint *sbp,
853				 void (*as)(struct process_stopping_handler *),
854				 enum callback_status (*ks)
855					 (struct process_stopping_handler *),
856				 enum callback_status (*uw)
857					(struct process_stopping_handler *))
858{
859	debug(DEBUG_FUNCTION,
860	      "process_install_stopping_handler: pid=%d", proc->pid);
861
862	struct process_stopping_handler *handler = calloc(sizeof(*handler), 1);
863	if (handler == NULL)
864		return -1;
865
866	if (as == NULL)
867		as = &linux_ptrace_disable_and_singlestep;
868	if (ks == NULL)
869		ks = &no;
870	if (uw == NULL)
871		uw = &no;
872
873	handler->super.on_event = process_stopping_on_event;
874	handler->super.destroy = process_stopping_destroy;
875	handler->task_enabling_breakpoint = proc;
876	handler->breakpoint_being_enabled = sbp;
877	handler->on_all_stopped = as;
878	handler->keep_stepping_p = ks;
879	handler->ugly_workaround_p = uw;
880
881	install_event_handler(proc->leader, &handler->super);
882
883	if (each_task(proc->leader, NULL, &send_sigstop,
884		      &handler->pids) != NULL) {
885		destroy_event_handler(proc);
886		return -1;
887	}
888
889	/* And deliver the first fake event, in case all the
890	 * conditions are already fulfilled.  */
891	Event ev = {
892		.type = EVENT_NONE,
893		.proc = proc,
894	};
895	process_stopping_on_event(&handler->super, &ev);
896
897	return 0;
898}
899
900void
901continue_after_breakpoint(struct process *proc, struct breakpoint *sbp)
902{
903	debug(DEBUG_PROCESS,
904	      "continue_after_breakpoint: pid=%d, addr=%p",
905	      proc->pid, sbp->addr);
906
907	set_instruction_pointer(proc, sbp->addr);
908
909	if (sbp->enabled == 0) {
910		continue_process(proc->pid);
911	} else if (process_install_stopping_handler
912			(proc, sbp, NULL, NULL, NULL) < 0) {
913		perror("process_stopping_handler_create");
914		/* Carry on not bothering to re-enable.  */
915		continue_process(proc->pid);
916	}
917}
918
919/**
920 * Ltrace exit.  When we are about to exit, we have to go through all
921 * the processes, stop them all, remove all the breakpoints, and then
922 * detach the processes that we attached to using -p.  If we left the
923 * other tasks running, they might hit stray return breakpoints and
924 * produce artifacts, so we better stop everyone, even if it's a bit
925 * of extra work.
926 */
927struct ltrace_exiting_handler
928{
929	struct event_handler super;
930	struct pid_set pids;
931};
932
933static Event *
934ltrace_exiting_on_event(struct event_handler *super, Event *event)
935{
936	struct ltrace_exiting_handler *self = (void *)super;
937	struct process *task = event->proc;
938	struct process *leader = task->leader;
939
940	debug(DEBUG_PROCESS,
941	      "ltrace_exiting_on_event: pid %d; event type %d",
942	      task->pid, event->type);
943
944	struct pid_task *task_info = get_task_info(&self->pids, task->pid);
945	handle_stopping_event(task_info, &event);
946
947	if (event != NULL && event->type == EVENT_BREAKPOINT)
948		undo_breakpoint(event, leader);
949
950	if (await_sigstop_delivery(&self->pids, task_info, event)
951	    && all_stops_accountable(&self->pids))
952		detach_process(leader);
953
954	/* Sink all non-exit events.  We are about to exit, so we
955	 * don't bother with queuing them. */
956	if (event_exit_or_none_p(event))
957		return event;
958
959	return NULL;
960}
961
962static void
963ltrace_exiting_destroy(struct event_handler *super)
964{
965	struct ltrace_exiting_handler *self = (void *)super;
966	free(self->pids.tasks);
967}
968
969static int
970ltrace_exiting_install_handler(struct process *proc)
971{
972	/* Only install to leader.  */
973	if (proc->leader != proc)
974		return 0;
975
976	/* Perhaps we are already installed, if the user passed
977	 * several -p options that are tasks of one process.  */
978	if (proc->event_handler != NULL
979	    && proc->event_handler->on_event == &ltrace_exiting_on_event)
980		return 0;
981
982	/* If stopping handler is already present, let it do the
983	 * work.  */
984	if (proc->event_handler != NULL) {
985		assert(proc->event_handler->on_event
986		       == &process_stopping_on_event);
987		struct process_stopping_handler *other
988			= (void *)proc->event_handler;
989		other->exiting = 1;
990		return 0;
991	}
992
993	struct ltrace_exiting_handler *handler
994		= calloc(sizeof(*handler), 1);
995	if (handler == NULL) {
996		perror("malloc exiting handler");
997	fatal:
998		/* XXXXXXXXXXXXXXXXXXX fixme */
999		return -1;
1000	}
1001
1002	handler->super.on_event = ltrace_exiting_on_event;
1003	handler->super.destroy = ltrace_exiting_destroy;
1004	install_event_handler(proc->leader, &handler->super);
1005
1006	if (each_task(proc->leader, NULL, &send_sigstop,
1007		      &handler->pids) != NULL)
1008		goto fatal;
1009
1010	return 0;
1011}
1012
1013/*
1014 * When the traced process vforks, it's suspended until the child
1015 * process calls _exit or exec*.  In the meantime, the two share the
1016 * address space.
1017 *
1018 * The child process should only ever call _exit or exec*, but we
1019 * can't count on that (it's not the role of ltrace to policy, but to
1020 * observe).  In any case, we will _at least_ have to deal with
1021 * removal of vfork return breakpoint (which we have to smuggle back
1022 * in, so that the parent can see it, too), and introduction of exec*
1023 * return breakpoint.  Since we already have both breakpoint actions
1024 * to deal with, we might as well support it all.
1025 *
1026 * The gist is that we pretend that the child is in a thread group
1027 * with its parent, and handle it as a multi-threaded case, with the
1028 * exception that we know that the parent is blocked, and don't
1029 * attempt to stop it.  When the child execs, we undo the setup.
1030 */
1031
1032struct process_vfork_handler
1033{
1034	struct event_handler super;
1035	arch_addr_t bp_addr;
1036};
1037
1038static Event *
1039process_vfork_on_event(struct event_handler *super, Event *event)
1040{
1041	debug(DEBUG_PROCESS,
1042	      "process_vfork_on_event: pid %d; event type %d",
1043	      event->proc->pid, event->type);
1044
1045	struct process_vfork_handler *self = (void *)super;
1046	assert(self != NULL);
1047
1048	switch (event->type) {
1049	case EVENT_BREAKPOINT:
1050		/* Remember the vfork return breakpoint.  */
1051		if (self->bp_addr == 0)
1052			self->bp_addr = event->e_un.brk_addr;
1053		break;
1054
1055	case EVENT_EXIT:
1056	case EVENT_EXIT_SIGNAL:
1057	case EVENT_EXEC:
1058		/* Smuggle back in the vfork return breakpoint, so
1059		 * that our parent can trip over it once again.  */
1060		if (self->bp_addr != 0) {
1061			struct breakpoint **found
1062				= DICT_FIND(event->proc->leader->breakpoints,
1063					    &self->bp_addr,
1064					    struct breakpoint *);
1065			if (found != NULL)
1066				assert((*found)->libsym == NULL);
1067			/* We don't mind failing that, it's not a big
1068			 * deal to not display one extra vfork return.  */
1069			insert_breakpoint(event->proc->parent,
1070					  self->bp_addr, NULL);
1071		}
1072
1073		continue_process(event->proc->parent->pid);
1074
1075		/* Remove the leader that we artificially set up
1076		 * earlier.  */
1077		change_process_leader(event->proc, event->proc);
1078		destroy_event_handler(event->proc);
1079
1080	default:
1081		;
1082	}
1083
1084	return event;
1085}
1086
1087void
1088continue_after_vfork(struct process *proc)
1089{
1090	debug(DEBUG_PROCESS, "continue_after_vfork: pid=%d", proc->pid);
1091	struct process_vfork_handler *handler = calloc(sizeof(*handler), 1);
1092	if (handler == NULL) {
1093		perror("malloc vfork handler");
1094		/* Carry on not bothering to treat the process as
1095		 * necessary.  */
1096		continue_process(proc->parent->pid);
1097		return;
1098	}
1099
1100	/* We must set up custom event handler, so that we see
1101	 * exec/exit events for the task itself.  */
1102	handler->super.on_event = process_vfork_on_event;
1103	install_event_handler(proc, &handler->super);
1104
1105	/* Make sure that the child is sole thread.  */
1106	assert(proc->leader == proc);
1107	assert(proc->next == NULL || proc->next->leader != proc);
1108
1109	/* Make sure that the child's parent is properly set up.  */
1110	assert(proc->parent != NULL);
1111	assert(proc->parent->leader != NULL);
1112
1113	change_process_leader(proc, proc->parent->leader);
1114}
1115
1116static int
1117is_mid_stopping(struct process *proc)
1118{
1119	return proc != NULL
1120		&& proc->event_handler != NULL
1121		&& proc->event_handler->on_event == &process_stopping_on_event;
1122}
1123
1124void
1125continue_after_syscall(struct process *proc, int sysnum, int ret_p)
1126{
1127	/* Don't continue if we are mid-stopping.  */
1128	if (ret_p && (is_mid_stopping(proc) || is_mid_stopping(proc->leader))) {
1129		debug(DEBUG_PROCESS,
1130		      "continue_after_syscall: don't continue %d",
1131		      proc->pid);
1132		return;
1133	}
1134	continue_process(proc->pid);
1135}
1136
1137void
1138continue_after_exec(struct process *proc)
1139{
1140	continue_process(proc->pid);
1141
1142	/* After the exec, we expect to hit the first executable
1143	 * instruction.
1144	 *
1145	 * XXX TODO It would be nice to have this removed, but then we
1146	 * need to do that also for initial call to wait_for_proc in
1147	 * execute_program.  In that case we could generate a
1148	 * EVENT_FIRST event or something, or maybe this could somehow
1149	 * be rolled into EVENT_NEW.  */
1150	wait_for_proc(proc->pid);
1151	continue_process(proc->pid);
1152}
1153
1154/* If ltrace gets SIGINT, the processes directly or indirectly run by
1155 * ltrace get it too.  We just have to wait long enough for the signal
1156 * to be delivered and the process terminated, which we notice and
1157 * exit ltrace, too.  So there's not much we need to do there.  We
1158 * want to keep tracing those processes as usual, in case they just
1159 * SIG_IGN the SIGINT to do their shutdown etc.
1160 *
1161 * For processes ran on the background, we want to install an exit
1162 * handler that stops all the threads, removes all breakpoints, and
1163 * detaches.
1164 */
1165void
1166os_ltrace_exiting(void)
1167{
1168	struct opt_p_t *it;
1169	for (it = opt_p; it != NULL; it = it->next) {
1170		struct process *proc = pid2proc(it->pid);
1171		if (proc == NULL || proc->leader == NULL)
1172			continue;
1173		if (ltrace_exiting_install_handler(proc->leader) < 0)
1174			fprintf(stderr,
1175				"Couldn't install exiting handler for %d.\n",
1176				proc->pid);
1177	}
1178}
1179
1180int
1181os_ltrace_exiting_sighandler(void)
1182{
1183	extern int linux_in_waitpid;
1184	if (linux_in_waitpid) {
1185		os_ltrace_exiting();
1186		return 1;
1187	}
1188	return 0;
1189}
1190
1191size_t
1192umovebytes(struct process *proc, void *addr, void *laddr, size_t len)
1193{
1194
1195	union {
1196		long a;
1197		char c[sizeof(long)];
1198	} a;
1199	int started = 0;
1200	size_t offset = 0, bytes_read = 0;
1201
1202	while (offset < len) {
1203		a.a = ptrace(PTRACE_PEEKTEXT, proc->pid, addr + offset, 0);
1204		if (a.a == -1 && errno) {
1205			if (started && errno == EIO)
1206				return bytes_read;
1207			else
1208				return -1;
1209		}
1210		started = 1;
1211
1212		if (len - offset >= sizeof(long)) {
1213			memcpy(laddr + offset, &a.c[0], sizeof(long));
1214			bytes_read += sizeof(long);
1215		}
1216		else {
1217			memcpy(laddr + offset, &a.c[0], len - offset);
1218			bytes_read += (len - offset);
1219		}
1220		offset += sizeof(long);
1221	}
1222
1223	return bytes_read;
1224}
1225