plt.c revision bac2da505ee174b7fb984b975c5938f88f0dbab2
1#include <gelf.h>
2#include <sys/ptrace.h>
3#include <errno.h>
4#include <error.h>
5#include <inttypes.h>
6#include <assert.h>
7#include <string.h>
8
9#include "proc.h"
10#include "common.h"
11#include "library.h"
12#include "breakpoint.h"
13#include "linux-gnu/trace.h"
14#include "backend.h"
15
16/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
17 * new-style "secure" PLT.  We can tell one from the other by the
18 * flags on the .plt section.  If it's +X (executable), it's BSS PLT,
19 * otherwise it's secure.
20 *
21 * BSS PLT works the same way as most architectures: the .plt section
22 * contains trampolines and we put breakpoints to those.  If not
23 * prelinked, .plt contains zeroes, and dynamic linker fills in the
24 * initial set of trampolines, which means that we need to delay
25 * enabling breakpoints until after binary entry point is hit.
26 * Additionally, after first call, dynamic linker updates .plt with
27 * branch to resolved address.  That means that on first hit, we must
28 * do something similar to the PPC64 gambit described below.
29 *
30 * With secure PLT, the .plt section doesn't contain instructions but
31 * addresses.  The real PLT table is stored in .text.  Addresses of
32 * those PLT entries can be computed, and apart from the fact that
33 * they are in .text, they are ordinary PLT entries.
34 *
35 * 64-bit PPC is more involved.  Program linker creates for each
36 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
37 * (where xxxxxxxx is a hexadecimal number).  That stub does the call
38 * dispatch: it loads an address of a function to call from the
39 * section .plt, and branches.  PLT entries themselves are essentially
40 * a curried call to the resolver.  When the symbol is resolved, the
41 * resolver updates the value stored in .plt, and the next time
42 * around, the stub calls the library function directly.  So we make
43 * at most one trip (none if the binary is prelinked) through each PLT
44 * entry, and correspondingly that is useless as a breakpoint site.
45 *
46 * Note the three confusing terms: stubs (that play the role of PLT
47 * entries), PLT entries, .plt section.
48 *
49 * We first check symbol tables and see if we happen to have stub
50 * symbols available.  If yes we just put breakpoints to those, and
51 * treat them as usual breakpoints.  The only tricky part is realizing
52 * that there can be more than one breakpoint per symbol.
53 *
54 * The case that we don't have the stub symbols available is harder.
55 * The following scheme uses two kinds of PLT breakpoints: unresolved
56 * and resolved (to some address).  When the process starts (or when
57 * we attach), we distribute unresolved PLT breakpoints to the PLT
58 * entries (not stubs).  Then we look in .plt, and for each entry
59 * whose value is different than the corresponding PLT entry address,
60 * we assume it was already resolved, and convert the breakpoint to
61 * resolved.  We also rewrite the resolved value in .plt back to the
62 * PLT address.
63 *
64 * When a PLT entry hits a resolved breakpoint (which happens because
65 * we rewrite .plt with the original unresolved addresses), we move
66 * the instruction pointer to the corresponding address and continue
67 * the process as if nothing happened.
68 *
69 * When unresolved PLT entry is called for the first time, we need to
70 * catch the new value that the resolver will write to a .plt slot.
71 * We also need to prevent another thread from racing through and
72 * taking the branch without ltrace noticing.  So when unresolved PLT
73 * entry hits, we have to stop all threads.  We then single-step
74 * through the resolver, until the .plt slot changes.  When it does,
75 * we treat it the same way as above: convert the PLT breakpoint to
76 * resolved, and rewrite the .plt value back to PLT address.  We then
77 * start all threads again.
78 *
79 * As an optimization, we remember the address where the address was
80 * resolved, and put a breakpoint there.  The next time around (when
81 * the next PLT entry is to be resolved), instead of single-stepping
82 * through half the dynamic linker, we just let the thread run and hit
83 * this breakpoint.  When it hits, we know the PLT entry was resolved.
84 *
85 * XXX TODO If we have hardware watch point, we might put a read watch
86 * on .plt slot, and discover the offenders this way.  I don't know
87 * the details, but I assume at most a handful (like, one or two, if
88 * available at all) addresses may be watched at a time, and thus this
89 * would be used as an amendment of the above rather than full-on
90 * solution to PLT tracing on PPC.
91 */
92
93#define PPC_PLT_STUB_SIZE 16
94#define PPC64_PLT_STUB_SIZE 8 //xxx
95
96static inline int
97host_powerpc64()
98{
99#ifdef __powerpc64__
100	return 1;
101#else
102	return 0;
103#endif
104}
105
106int
107read_target_4(struct Process *proc, arch_addr_t addr, uint32_t *lp)
108{
109	unsigned long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
110	if (l == -1UL && errno)
111		return -1;
112#ifdef __powerpc64__
113	l >>= 32;
114#endif
115	*lp = l;
116	return 0;
117}
118
119static int
120read_target_8(struct Process *proc, arch_addr_t addr, uint64_t *lp)
121{
122	unsigned long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
123	if (l == -1UL && errno)
124		return -1;
125	if (host_powerpc64()) {
126		*lp = l;
127	} else {
128		unsigned long l2 = ptrace(PTRACE_PEEKTEXT, proc->pid,
129					  addr + 4, 0);
130		if (l2 == -1UL && errno)
131			return -1;
132		*lp = ((uint64_t)l << 32) | l2;
133	}
134	return 0;
135}
136
137int
138read_target_long(struct Process *proc, arch_addr_t addr, uint64_t *lp)
139{
140	if (proc->e_machine == EM_PPC) {
141		uint32_t w;
142		int ret = read_target_4(proc, addr, &w);
143		if (ret >= 0)
144			*lp = (uint64_t)w;
145		return ret;
146	} else {
147		return read_target_8(proc, addr, lp);
148	}
149}
150
151static enum callback_status
152reenable_breakpoint(struct Process *proc, struct breakpoint *bp, void *data)
153{
154	/* We don't need to re-enable non-PLT breakpoints and
155	 * breakpoints that are not PPC32 BSS unprelinked.  */
156	if (bp->libsym == NULL
157	    || bp->libsym->plt_type == LS_TOPLT_NONE
158	    || bp->libsym->lib->arch.bss_plt_prelinked != 0)
159		return CBS_CONT;
160
161	debug(DEBUG_PROCESS, "pid=%d reenable_breakpoint %s",
162	      proc->pid, breakpoint_name(bp));
163
164	assert(proc->e_machine == EM_PPC);
165	uint64_t l;
166	if (read_target_8(proc, bp->addr, &l) < 0) {
167		error(0, errno, "couldn't read PLT value for %s(%p)",
168		      breakpoint_name(bp), bp->addr);
169		return CBS_CONT;
170	}
171
172	/* XXX double cast  */
173	bp->libsym->arch.plt_slot_addr = (GElf_Addr)(uintptr_t)bp->addr;
174
175	/* If necessary, re-enable the breakpoint if it was
176	 * overwritten by the dynamic linker.  */
177	union {
178		uint32_t insn;
179		char buf[4];
180	} u = { .buf = BREAKPOINT_VALUE };
181	if (l >> 32 == u.insn)
182		debug(DEBUG_PROCESS, "pid=%d, breakpoint still present"
183		      " at %p, avoiding reenable", proc->pid, bp->addr);
184	else
185		enable_breakpoint(proc, bp);
186
187	bp->libsym->arch.resolved_value = l;
188
189	return CBS_CONT;
190}
191
192void
193arch_dynlink_done(struct Process *proc)
194{
195	/* On PPC32, .plt of objects that use BSS PLT are overwritten
196	 * by the dynamic linker (unless that object was prelinked).
197	 * We need to re-enable breakpoints in those objects.  */
198	proc_each_breakpoint(proc, NULL, reenable_breakpoint, NULL);
199}
200
201GElf_Addr
202arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
203{
204	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
205		assert(lte->arch.plt_stub_vma != 0);
206		return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
207
208	} else if (lte->ehdr.e_machine == EM_PPC) {
209		return rela->r_offset;
210
211	} else {
212		/* If we get here, we don't have stub symbols.  In
213		 * that case we put brakpoints to PLT entries the same
214		 * as the PPC32 secure PLT case does.  */
215		assert(lte->arch.plt_stub_vma != 0);
216		return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx;
217	}
218}
219
220/* This entry point is called when ltelf is not available
221 * anymore--during runtime.  At that point we don't have to concern
222 * ourselves with bias, as the values in OPD have been resolved
223 * already.  */
224int
225arch_translate_address_dyn(struct Process *proc,
226			   arch_addr_t addr, arch_addr_t *ret)
227{
228	if (proc->e_machine == EM_PPC64) {
229		uint64_t value;
230		if (read_target_8(proc, addr, &value) < 0) {
231			error(0, errno, "dynamic .opd translation of %p", addr);
232			return -1;
233		}
234		/* XXX The double cast should be removed when
235		 * arch_addr_t becomes integral type.  */
236		*ret = (arch_addr_t)(uintptr_t)value;
237		return 0;
238	}
239
240	*ret = addr;
241	return 0;
242}
243
244int
245arch_translate_address(struct ltelf *lte,
246		       arch_addr_t addr, arch_addr_t *ret)
247{
248	if (lte->ehdr.e_machine == EM_PPC64) {
249		/* XXX The double cast should be removed when
250		 * arch_addr_t becomes integral type.  */
251		GElf_Xword offset
252			= (GElf_Addr)(uintptr_t)addr - lte->arch.opd_base;
253		uint64_t value;
254		if (elf_read_u64(lte->arch.opd_data, offset, &value) < 0) {
255			error(0, 0, "static .opd translation of %p: %s", addr,
256			      elf_errmsg(-1));
257			return -1;
258		}
259		*ret = (arch_addr_t)(uintptr_t)(value + lte->bias);
260		return 0;
261	}
262
263	*ret = addr;
264	return 0;
265}
266
267static int
268load_opd_data(struct ltelf *lte, struct library *lib)
269{
270	Elf_Scn *sec;
271	GElf_Shdr shdr;
272	if (elf_get_section_named(lte, ".opd", &sec, &shdr) < 0) {
273	fail:
274		fprintf(stderr, "couldn't find .opd data\n");
275		return -1;
276	}
277
278	lte->arch.opd_data = elf_rawdata(sec, NULL);
279	if (lte->arch.opd_data == NULL)
280		goto fail;
281
282	lte->arch.opd_base = shdr.sh_addr + lte->bias;
283	lte->arch.opd_size = shdr.sh_size;
284
285	return 0;
286}
287
288void *
289sym2addr(struct Process *proc, struct library_symbol *sym)
290{
291	return sym->enter_addr;
292}
293
294static GElf_Addr
295get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
296{
297	Elf_Scn *ppcgot_sec = NULL;
298	GElf_Shdr ppcgot_shdr;
299	if (ppcgot != 0
300	    && elf_get_section_covering(lte, ppcgot,
301					&ppcgot_sec, &ppcgot_shdr) < 0)
302		error(0, 0, "DT_PPC_GOT=%#"PRIx64", but no such section found",
303		      ppcgot);
304
305	if (ppcgot_sec != NULL) {
306		Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
307		if (data == NULL || data->d_size < 8 ) {
308			error(0, 0, "couldn't read GOT data");
309		} else {
310			// where PPCGOT begins in .got
311			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
312			assert(offset % 4 == 0);
313			uint32_t glink_vma;
314			if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
315				error(0, 0, "couldn't read glink VMA address"
316				      " at %zd@GOT", offset);
317				return 0;
318			}
319			if (glink_vma != 0) {
320				debug(1, "PPC GOT glink_vma address: %#" PRIx32,
321				      glink_vma);
322				return (GElf_Addr)glink_vma;
323			}
324		}
325	}
326
327	if (plt_data != NULL) {
328		uint32_t glink_vma;
329		if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
330			error(0, 0, "couldn't read glink VMA address");
331			return 0;
332		}
333		debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
334		return (GElf_Addr)glink_vma;
335	}
336
337	return 0;
338}
339
340static int
341load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
342{
343	Elf_Scn *scn;
344	GElf_Shdr shdr;
345	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
346	    || scn == NULL) {
347	fail:
348		error(0, 0, "Couldn't get SHT_DYNAMIC: %s",
349		      elf_errmsg(-1));
350		return -1;
351	}
352
353	Elf_Data *data = elf_loaddata(scn, &shdr);
354	if (data == NULL)
355		goto fail;
356
357	size_t j;
358	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
359		GElf_Dyn dyn;
360		if (gelf_getdyn(data, j, &dyn) == NULL)
361			goto fail;
362
363		if(dyn.d_tag == tag) {
364			*valuep = dyn.d_un.d_ptr;
365			return 0;
366		}
367	}
368
369	return -1;
370}
371
372static int
373load_ppcgot(struct ltelf *lte, GElf_Addr *ppcgotp)
374{
375	return load_dynamic_entry(lte, DT_PPC_GOT, ppcgotp);
376}
377
378static int
379load_ppc64_glink(struct ltelf *lte, GElf_Addr *glinkp)
380{
381	return load_dynamic_entry(lte, DT_PPC64_GLINK, glinkp);
382}
383
384static int
385nonzero_data(Elf_Data *data)
386{
387	/* We are not supposed to get here if there's no PLT.  */
388	assert(data != NULL);
389
390	unsigned char *buf = data->d_buf;
391	if (buf == NULL)
392		return 0;
393
394	size_t i;
395	for (i = 0; i < data->d_size; ++i)
396		if (buf[i] != 0)
397			return 1;
398	return 0;
399}
400
401int
402arch_elf_init(struct ltelf *lte, struct library *lib)
403{
404	if (lte->ehdr.e_machine == EM_PPC64
405	    && load_opd_data(lte, lib) < 0)
406		return -1;
407
408	lte->arch.secure_plt = !(lte->plt_flags & SHF_EXECINSTR);
409
410	/* For PPC32 BSS, it is important whether the binary was
411	 * prelinked.  If .plt section is NODATA, or if it contains
412	 * zeroes, then this library is not prelinked, and we need to
413	 * delay breakpoints.  */
414	if (lte->ehdr.e_machine == EM_PPC && !lte->arch.secure_plt)
415		lib->arch.bss_plt_prelinked = nonzero_data(lte->plt_data);
416	else
417		/* For cases where it's irrelevant, initialize the
418		 * value to something conspicuous.  */
419		lib->arch.bss_plt_prelinked = -1;
420
421	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
422		GElf_Addr ppcgot;
423		if (load_ppcgot(lte, &ppcgot) < 0) {
424			error(0, 0, "couldn't find DT_PPC_GOT");
425			return -1;
426		}
427		GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
428
429		assert (lte->relplt_size % 12 == 0);
430		size_t count = lte->relplt_size / 12; // size of RELA entry
431		lte->arch.plt_stub_vma = glink_vma
432			- (GElf_Addr)count * PPC_PLT_STUB_SIZE;
433		debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
434
435	} else if (lte->ehdr.e_machine == EM_PPC64) {
436		GElf_Addr glink_vma;
437		if (load_ppc64_glink(lte, &glink_vma) < 0) {
438			error(0, 0, "couldn't find DT_PPC64_GLINK");
439			return -1;
440		}
441
442		/* The first glink stub starts at offset 32.  */
443		lte->arch.plt_stub_vma = glink_vma + 32;
444	}
445
446	/* On PPC64, look for stub symbols in symbol table.  These are
447	 * called: xxxxxxxx.plt_call.callee_name@version+addend.  */
448	if (lte->ehdr.e_machine == EM_PPC64
449	    && lte->symtab != NULL && lte->strtab != NULL) {
450
451		/* N.B. We can't simply skip the symbols that we fail
452		 * to read or malloc.  There may be more than one stub
453		 * per symbol name, and if we failed in one but
454		 * succeeded in another, the PLT enabling code would
455		 * have no way to tell that something is missing.  We
456		 * could work around that, of course, but it doesn't
457		 * seem worth the trouble.  So if anything fails, we
458		 * just pretend that we don't have stub symbols at
459		 * all, as if the binary is stripped.  */
460
461		size_t i;
462		for (i = 0; i < lte->symtab_count; ++i) {
463			GElf_Sym sym;
464			if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
465				struct library_symbol *sym, *next;
466			fail:
467				for (sym = lte->arch.stubs; sym != NULL; ) {
468					next = sym->next;
469					library_symbol_destroy(sym);
470					free(sym);
471					sym = next;
472				}
473				lte->arch.stubs = NULL;
474				break;
475			}
476
477			const char *name = lte->strtab + sym.st_name;
478
479#define STUBN ".plt_call."
480			if ((name = strstr(name, STUBN)) == NULL)
481				continue;
482			name += sizeof(STUBN) - 1;
483#undef STUBN
484
485			size_t len;
486			const char *ver = strchr(name, '@');
487			if (ver != NULL) {
488				len = ver - name;
489
490			} else {
491				/* If there is "+" at all, check that
492				 * the symbol name ends in "+0".  */
493				const char *add = strrchr(name, '+');
494				if (add != NULL) {
495					assert(strcmp(add, "+0") == 0);
496					len = add - name;
497				} else {
498					len = strlen(name);
499				}
500			}
501
502			char *sym_name = strndup(name, len);
503			struct library_symbol *libsym = malloc(sizeof(*libsym));
504			if (sym_name == NULL || libsym == NULL) {
505			fail2:
506				free(sym_name);
507				free(libsym);
508				goto fail;
509			}
510
511			/* XXX The double cast should be removed when
512			 * arch_addr_t becomes integral type.  */
513			arch_addr_t addr = (arch_addr_t)
514				(uintptr_t)sym.st_value + lte->bias;
515			if (library_symbol_init(libsym, addr, sym_name, 1,
516						LS_TOPLT_EXEC) < 0)
517				goto fail2;
518			libsym->arch.type = PPC64_PLT_STUB;
519			libsym->next = lte->arch.stubs;
520			lte->arch.stubs = libsym;
521		}
522	}
523
524	return 0;
525}
526
527static int
528read_plt_slot_value(struct Process *proc, GElf_Addr addr, GElf_Addr *valp)
529{
530	/* On PPC64, we read from .plt, which contains 8 byte
531	 * addresses.  On PPC32 we read from .plt, which contains 4
532	 * byte instructions, but the PLT is two instructions, and
533	 * either can change.  */
534	uint64_t l;
535	/* XXX double cast.  */
536	if (read_target_8(proc, (arch_addr_t)(uintptr_t)addr, &l) < 0) {
537		error(0, errno, "ptrace .plt slot value @%#" PRIx64, addr);
538		return -1;
539	}
540
541	*valp = (GElf_Addr)l;
542	return 0;
543}
544
545static int
546unresolve_plt_slot(struct Process *proc, GElf_Addr addr, GElf_Addr value)
547{
548	/* We only modify plt_entry[0], which holds the resolved
549	 * address of the routine.  We keep the TOC and environment
550	 * pointers intact.  Hence the only adjustment that we need to
551	 * do is to IP.  */
552	if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) {
553		error(0, errno, "unresolve .plt slot");
554		return -1;
555	}
556	return 0;
557}
558
559static void
560mark_as_resolved(struct library_symbol *libsym, GElf_Addr value)
561{
562	libsym->arch.type = PPC_PLT_RESOLVED;
563	libsym->arch.resolved_value = value;
564}
565
566enum plt_status
567arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
568		       const char *a_name, GElf_Rela *rela, size_t ndx,
569		       struct library_symbol **ret)
570{
571	if (lte->ehdr.e_machine == EM_PPC)
572		return plt_default;
573
574	/* PPC64.  If we have stubs, we return a chain of breakpoint
575	 * sites, one for each stub that corresponds to this PLT
576	 * entry.  */
577	struct library_symbol *chain = NULL;
578	struct library_symbol **symp;
579	for (symp = &lte->arch.stubs; *symp != NULL; ) {
580		struct library_symbol *sym = *symp;
581		if (strcmp(sym->name, a_name) != 0) {
582			symp = &(*symp)->next;
583			continue;
584		}
585
586		/* Re-chain the symbol from stubs to CHAIN.  */
587		*symp = sym->next;
588		sym->next = chain;
589		chain = sym;
590	}
591
592	if (chain != NULL) {
593		*ret = chain;
594		return plt_ok;
595	}
596
597	/* We don't have stub symbols.  Find corresponding .plt slot,
598	 * and check whether it contains the corresponding PLT address
599	 * (or 0 if the dynamic linker hasn't run yet).  N.B. we don't
600	 * want read this from ELF file, but from process image.  That
601	 * makes a difference if we are attaching to a running
602	 * process.  */
603
604	GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela);
605	GElf_Addr plt_slot_addr = rela->r_offset;
606	assert(plt_slot_addr >= lte->plt_addr
607	       || plt_slot_addr < lte->plt_addr + lte->plt_size);
608
609	GElf_Addr plt_slot_value;
610	if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0)
611		return plt_fail;
612
613	char *name = strdup(a_name);
614	struct library_symbol *libsym = malloc(sizeof(*libsym));
615	if (name == NULL || libsym == NULL) {
616		error(0, errno, "allocation for .plt slot");
617	fail:
618		free(name);
619		free(libsym);
620		return plt_fail;
621	}
622
623	/* XXX The double cast should be removed when
624	 * arch_addr_t becomes integral type.  */
625	if (library_symbol_init(libsym,
626				(arch_addr_t)(uintptr_t)plt_entry_addr,
627				name, 1, LS_TOPLT_EXEC) < 0)
628		goto fail;
629	libsym->arch.plt_slot_addr = plt_slot_addr;
630
631	if (plt_slot_value == plt_entry_addr || plt_slot_value == 0) {
632		libsym->arch.type = PPC_PLT_UNRESOLVED;
633		libsym->arch.resolved_value = plt_entry_addr;
634
635	} else {
636		/* Unresolve the .plt slot.  If the binary was
637		 * prelinked, this makes the code invalid, because in
638		 * case of prelinked binary, the dynamic linker
639		 * doesn't update .plt[0] and .plt[1] with addresses
640		 * of the resover.  But we don't care, we will never
641		 * need to enter the resolver.  That just means that
642		 * we have to un-un-resolve this back before we
643		 * detach.  */
644
645		if (unresolve_plt_slot(proc, plt_slot_addr, plt_entry_addr) < 0) {
646			library_symbol_destroy(libsym);
647			goto fail;
648		}
649		mark_as_resolved(libsym, plt_slot_value);
650	}
651
652	*ret = libsym;
653	return plt_ok;
654}
655
656void
657arch_elf_destroy(struct ltelf *lte)
658{
659	struct library_symbol *sym;
660	for (sym = lte->arch.stubs; sym != NULL; ) {
661		struct library_symbol *next = sym->next;
662		library_symbol_destroy(sym);
663		free(sym);
664		sym = next;
665	}
666}
667
668static void
669dl_plt_update_bp_on_hit(struct breakpoint *bp, struct Process *proc)
670{
671	debug(DEBUG_PROCESS, "pid=%d dl_plt_update_bp_on_hit %s(%p)",
672	      proc->pid, breakpoint_name(bp), bp->addr);
673	struct process_stopping_handler *self = proc->arch.handler;
674	assert(self != NULL);
675
676	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
677	GElf_Addr value;
678	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
679		return;
680
681	/* On PPC64, we rewrite the slot value.  */
682	if (proc->e_machine == EM_PPC64)
683		unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
684				   libsym->arch.resolved_value);
685	/* We mark the breakpoint as resolved on both arches.  */
686	mark_as_resolved(libsym, value);
687
688	/* cb_on_all_stopped looks if HANDLER is set to NULL as a way
689	 * to check that this was run.  It's an error if it
690	 * wasn't.  */
691	proc->arch.handler = NULL;
692
693	breakpoint_turn_off(bp, proc);
694}
695
696static void
697cb_on_all_stopped(struct process_stopping_handler *self)
698{
699	/* Put that in for dl_plt_update_bp_on_hit to see.  */
700	assert(self->task_enabling_breakpoint->arch.handler == NULL);
701	self->task_enabling_breakpoint->arch.handler = self;
702
703	linux_ptrace_disable_and_continue(self);
704}
705
706static enum callback_status
707cb_keep_stepping_p(struct process_stopping_handler *self)
708{
709	struct Process *proc = self->task_enabling_breakpoint;
710	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
711
712	GElf_Addr value;
713	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
714		return CBS_FAIL;
715
716	/* In UNRESOLVED state, the RESOLVED_VALUE in fact contains
717	 * the PLT entry value.  */
718	if (value == libsym->arch.resolved_value)
719		return CBS_CONT;
720
721	debug(DEBUG_PROCESS, "pid=%d PLT got resolved to value %#"PRIx64,
722	      proc->pid, value);
723
724	/* The .plt slot got resolved!  We can migrate the breakpoint
725	 * to RESOLVED and stop single-stepping.  */
726	if (proc->e_machine == EM_PPC64
727	    && unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
728				  libsym->arch.resolved_value) < 0)
729		return CBS_FAIL;
730
731	/* Resolving on PPC64 consists of overwriting a doubleword in
732	 * .plt.  That doubleword is than read back by a stub, and
733	 * jumped on.  Hopefully we can assume that double word update
734	 * is done on a single place only, as it contains a final
735	 * address.  We still need to look around for any sync
736	 * instruction, but essentially it is safe to optimize away
737	 * the single stepping next time and install a post-update
738	 * breakpoint.
739	 *
740	 * The situation on PPC32 BSS is more complicated.  The
741	 * dynamic linker here updates potentially several
742	 * instructions (XXX currently we assume two) and the rules
743	 * are more complicated.  Sometimes it's enough to adjust just
744	 * one of the addresses--the logic for generating optimal
745	 * dispatch depends on relative addresses of the .plt entry
746	 * and the jump destination.  We can't assume that the some
747	 * instruction block does the update every time.  So on PPC32,
748	 * we turn the optimization off and just step through it each
749	 * time.  */
750	if (proc->e_machine == EM_PPC)
751		goto done;
752
753	/* Install breakpoint to the address where the change takes
754	 * place.  If we fail, then that just means that we'll have to
755	 * singlestep the next time around as well.  */
756	struct Process *leader = proc->leader;
757	if (leader == NULL || leader->arch.dl_plt_update_bp != NULL)
758		goto done;
759
760	/* We need to install to the next instruction.  ADDR points to
761	 * a store instruction, so moving the breakpoint one
762	 * instruction forward is safe.  */
763	arch_addr_t addr = get_instruction_pointer(proc) + 4;
764	leader->arch.dl_plt_update_bp = insert_breakpoint(proc, addr, NULL);
765	if (leader->arch.dl_plt_update_bp == NULL)
766		goto done;
767
768	static struct bp_callbacks dl_plt_update_cbs = {
769		.on_hit = dl_plt_update_bp_on_hit,
770	};
771	leader->arch.dl_plt_update_bp->cbs = &dl_plt_update_cbs;
772
773	/* Turn it off for now.  We will turn it on again when we hit
774	 * the PLT entry that needs this.  */
775	breakpoint_turn_off(leader->arch.dl_plt_update_bp, proc);
776
777done:
778	mark_as_resolved(libsym, value);
779
780	return CBS_STOP;
781}
782
783static void
784jump_to_entry_point(struct Process *proc, struct breakpoint *bp)
785{
786	/* XXX The double cast should be removed when
787	 * arch_addr_t becomes integral type.  */
788	arch_addr_t rv = (arch_addr_t)
789		(uintptr_t)bp->libsym->arch.resolved_value;
790	set_instruction_pointer(proc, rv);
791}
792
793static void
794ppc_plt_bp_continue(struct breakpoint *bp, struct Process *proc)
795{
796	switch (bp->libsym->arch.type) {
797		struct Process *leader;
798		void (*on_all_stopped)(struct process_stopping_handler *);
799		enum callback_status (*keep_stepping_p)
800			(struct process_stopping_handler *);
801
802	case PPC_DEFAULT:
803		assert(proc->e_machine == EM_PPC);
804		assert(bp->libsym != NULL);
805		assert(bp->libsym->lib->arch.bss_plt_prelinked == 0);
806		/* fall-through */
807
808	case PPC_PLT_UNRESOLVED:
809		on_all_stopped = NULL;
810		keep_stepping_p = NULL;
811		leader = proc->leader;
812
813		if (leader != NULL && leader->arch.dl_plt_update_bp != NULL
814		    && breakpoint_turn_on(leader->arch.dl_plt_update_bp,
815					  proc) >= 0)
816			on_all_stopped = cb_on_all_stopped;
817		else
818			keep_stepping_p = cb_keep_stepping_p;
819
820		if (process_install_stopping_handler
821		    (proc, bp, on_all_stopped, keep_stepping_p, NULL) < 0) {
822			error(0, 0, "ppc_plt_bp_continue: couldn't install"
823			      " event handler");
824			continue_after_breakpoint(proc, bp);
825		}
826		return;
827
828	case PPC_PLT_RESOLVED:
829		if (proc->e_machine == EM_PPC) {
830			continue_after_breakpoint(proc, bp);
831			return;
832		}
833
834		jump_to_entry_point(proc, bp);
835		continue_process(proc->pid);
836		return;
837
838	case PPC64_PLT_STUB:
839		/* These should never hit here.  */
840		break;
841	}
842
843	assert(bp->libsym->arch.type != bp->libsym->arch.type);
844	abort();
845}
846
847/* When a process is in a PLT stub, it may have already read the data
848 * in .plt that we changed.  If we detach now, it will jump to PLT
849 * entry and continue to the dynamic linker, where it will SIGSEGV,
850 * because zeroth .plt slot is not filled in prelinked binaries, and
851 * the dynamic linker needs that data.  Moreover, the process may
852 * actually have hit the breakpoint already.  This functions tries to
853 * detect both cases and do any fix-ups necessary to mend this
854 * situation.  */
855static enum callback_status
856detach_task_cb(struct Process *task, void *data)
857{
858	struct breakpoint *bp = data;
859
860	if (get_instruction_pointer(task) == bp->addr) {
861		debug(DEBUG_PROCESS, "%d at %p, which is PLT slot",
862		      task->pid, bp->addr);
863		jump_to_entry_point(task, bp);
864		return CBS_CONT;
865	}
866
867	/* XXX There's still a window of several instructions where we
868	 * might catch the task inside a stub such that it has already
869	 * read destination address from .plt, but hasn't jumped yet,
870	 * thus avoiding the breakpoint.  */
871
872	return CBS_CONT;
873}
874
875static void
876ppc_plt_bp_retract(struct breakpoint *bp, struct Process *proc)
877{
878	/* On PPC64, we rewrite .plt with PLT entry addresses.  This
879	 * needs to be undone.  Unfortunately, the program may have
880	 * made decisions based on that value */
881	if (proc->e_machine == EM_PPC64
882	    && bp->libsym != NULL
883	    && bp->libsym->arch.type == PPC_PLT_RESOLVED) {
884		each_task(proc->leader, NULL, detach_task_cb, bp);
885		unresolve_plt_slot(proc, bp->libsym->arch.plt_slot_addr,
886				   bp->libsym->arch.resolved_value);
887	}
888}
889
890void
891arch_library_init(struct library *lib)
892{
893}
894
895void
896arch_library_destroy(struct library *lib)
897{
898}
899
900void
901arch_library_clone(struct library *retp, struct library *lib)
902{
903}
904
905int
906arch_library_symbol_init(struct library_symbol *libsym)
907{
908	/* We set type explicitly in the code above, where we have the
909	 * necessary context.  This is for calls from ltrace-elf.c and
910	 * such.  */
911	libsym->arch.type = PPC_DEFAULT;
912	return 0;
913}
914
915void
916arch_library_symbol_destroy(struct library_symbol *libsym)
917{
918}
919
920int
921arch_library_symbol_clone(struct library_symbol *retp,
922			  struct library_symbol *libsym)
923{
924	retp->arch = libsym->arch;
925	return 0;
926}
927
928/* For some symbol types, we need to set up custom callbacks.  XXX we
929 * don't need PROC here, we can store the data in BP if it is of
930 * interest to us.  */
931int
932arch_breakpoint_init(struct Process *proc, struct breakpoint *bp)
933{
934	/* Artificial and entry-point breakpoints are plain.  */
935	if (bp->libsym == NULL || bp->libsym->plt_type != LS_TOPLT_EXEC)
936		return 0;
937
938	/* On PPC, secure PLT and prelinked BSS PLT are plain.  */
939	if (proc->e_machine == EM_PPC
940	    && bp->libsym->lib->arch.bss_plt_prelinked != 0)
941		return 0;
942
943	/* On PPC64, stub PLT breakpoints are plain.  */
944	if (proc->e_machine == EM_PPC64
945	    && bp->libsym->arch.type == PPC64_PLT_STUB)
946		return 0;
947
948	static struct bp_callbacks cbs = {
949		.on_continue = ppc_plt_bp_continue,
950		.on_retract = ppc_plt_bp_retract,
951	};
952	breakpoint_set_callbacks(bp, &cbs);
953	return 0;
954}
955
956void
957arch_breakpoint_destroy(struct breakpoint *bp)
958{
959}
960
961int
962arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp)
963{
964	retp->arch = sbp->arch;
965	return 0;
966}
967
968int
969arch_process_init(struct Process *proc)
970{
971	proc->arch.dl_plt_update_bp = NULL;
972	proc->arch.handler = NULL;
973	return 0;
974}
975
976void
977arch_process_destroy(struct Process *proc)
978{
979}
980
981int
982arch_process_clone(struct Process *retp, struct Process *proc)
983{
984	retp->arch = proc->arch;
985	return 0;
986}
987
988int
989arch_process_exec(struct Process *proc)
990{
991	return arch_process_init(proc);
992}
993