plt.c revision 7287166e8fd5949ffcf8eb1f3d378b5ea538915e
1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2012,2013 Petr Machata, Red Hat Inc.
4 * Copyright (C) 2004,2008,2009 Juan Cespedes
5 * Copyright (C) 2006 Paul Gilliam
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
20 * 02110-1301 USA
21 */
22
23#include <gelf.h>
24#include <sys/ptrace.h>
25#include <errno.h>
26#include <inttypes.h>
27#include <assert.h>
28#include <string.h>
29
30#include "proc.h"
31#include "common.h"
32#include "insn.h"
33#include "library.h"
34#include "breakpoint.h"
35#include "linux-gnu/trace.h"
36#include "backend.h"
37
38/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
39 * new-style "secure" PLT.  We can tell one from the other by the
40 * flags on the .plt section.  If it's +X (executable), it's BSS PLT,
41 * otherwise it's secure.
42 *
43 * BSS PLT works the same way as most architectures: the .plt section
44 * contains trampolines and we put breakpoints to those.  If not
45 * prelinked, .plt contains zeroes, and dynamic linker fills in the
46 * initial set of trampolines, which means that we need to delay
47 * enabling breakpoints until after binary entry point is hit.
48 * Additionally, after first call, dynamic linker updates .plt with
49 * branch to resolved address.  That means that on first hit, we must
50 * do something similar to the PPC64 gambit described below.
51 *
52 * With secure PLT, the .plt section doesn't contain instructions but
53 * addresses.  The real PLT table is stored in .text.  Addresses of
54 * those PLT entries can be computed, and apart from the fact that
55 * they are in .text, they are ordinary PLT entries.
56 *
57 * 64-bit PPC is more involved.  Program linker creates for each
58 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
59 * (where xxxxxxxx is a hexadecimal number).  That stub does the call
60 * dispatch: it loads an address of a function to call from the
61 * section .plt, and branches.  PLT entries themselves are essentially
62 * a curried call to the resolver.  When the symbol is resolved, the
63 * resolver updates the value stored in .plt, and the next time
64 * around, the stub calls the library function directly.  So we make
65 * at most one trip (none if the binary is prelinked) through each PLT
66 * entry, and correspondingly that is useless as a breakpoint site.
67 *
68 * Note the three confusing terms: stubs (that play the role of PLT
69 * entries), PLT entries, .plt section.
70 *
71 * We first check symbol tables and see if we happen to have stub
72 * symbols available.  If yes we just put breakpoints to those, and
73 * treat them as usual breakpoints.  The only tricky part is realizing
74 * that there can be more than one breakpoint per symbol.
75 *
76 * The case that we don't have the stub symbols available is harder.
77 * The following scheme uses two kinds of PLT breakpoints: unresolved
78 * and resolved (to some address).  When the process starts (or when
79 * we attach), we distribute unresolved PLT breakpoints to the PLT
80 * entries (not stubs).  Then we look in .plt, and for each entry
81 * whose value is different than the corresponding PLT entry address,
82 * we assume it was already resolved, and convert the breakpoint to
83 * resolved.  We also rewrite the resolved value in .plt back to the
84 * PLT address.
85 *
86 * When a PLT entry hits a resolved breakpoint (which happens because
87 * we rewrite .plt with the original unresolved addresses), we move
88 * the instruction pointer to the corresponding address and continue
89 * the process as if nothing happened.
90 *
91 * When unresolved PLT entry is called for the first time, we need to
92 * catch the new value that the resolver will write to a .plt slot.
93 * We also need to prevent another thread from racing through and
94 * taking the branch without ltrace noticing.  So when unresolved PLT
95 * entry hits, we have to stop all threads.  We then single-step
96 * through the resolver, until the .plt slot changes.  When it does,
97 * we treat it the same way as above: convert the PLT breakpoint to
98 * resolved, and rewrite the .plt value back to PLT address.  We then
99 * start all threads again.
100 *
101 * As an optimization, we remember the address where the address was
102 * resolved, and put a breakpoint there.  The next time around (when
103 * the next PLT entry is to be resolved), instead of single-stepping
104 * through half the dynamic linker, we just let the thread run and hit
105 * this breakpoint.  When it hits, we know the PLT entry was resolved.
106 *
107 * XXX TODO If we have hardware watch point, we might put a read watch
108 * on .plt slot, and discover the offenders this way.  I don't know
109 * the details, but I assume at most a handful (like, one or two, if
110 * available at all) addresses may be watched at a time, and thus this
111 * would be used as an amendment of the above rather than full-on
112 * solution to PLT tracing on PPC.
113 */
114
115#define PPC_PLT_STUB_SIZE 16
116#define PPC64_PLT_STUB_SIZE 8 //xxx
117
118static inline int
119host_powerpc64()
120{
121#ifdef __powerpc64__
122	return 1;
123#else
124	return 0;
125#endif
126}
127
128static void
129mark_as_resolved(struct library_symbol *libsym, GElf_Addr value)
130{
131	libsym->arch.type = PPC_PLT_RESOLVED;
132	libsym->arch.resolved_value = value;
133}
134
135void
136arch_dynlink_done(struct process *proc)
137{
138	/* On PPC32 with BSS PLT, we need to enable delayed symbols.  */
139	struct library_symbol *libsym = NULL;
140	while ((libsym = proc_each_symbol(proc, libsym,
141					  library_symbol_delayed_cb, NULL))) {
142		if (proc_read_64(proc, libsym->enter_addr,
143				 &libsym->arch.resolved_value) < 0) {
144			fprintf(stderr,
145				"couldn't read PLT value for %s(%p): %s\n",
146				libsym->name, libsym->enter_addr,
147				strerror(errno));
148			return;
149		}
150
151		/* arch_dynlink_done is called on attach as well.  In
152		 * that case some slots will have been resolved
153		 * already.  Unresolved PLT looks like this:
154		 *
155		 *    <sleep@plt>:	li      r11,0
156		 *    <sleep@plt+4>:	b       "resolve"
157		 *
158		 * "resolve" is another address in PLTGOT (the same
159		 * block that all the PLT slots are it).  When
160		 * resolved, it looks either this way:
161		 *
162		 *    <sleep@plt>:	b       0xfea88d0 <sleep>
163		 *
164		 * Which is easy to detect.  It can also look this
165		 * way:
166		 *
167		 *    <sleep@plt>:	li      r11,0
168		 *    <sleep@plt+4>:	b       "dispatch"
169		 *
170		 * The "dispatch" address lies in PLTGOT as well.  In
171		 * current GNU toolchain, "dispatch" address is the
172		 * same as PLTGOT address.  We rely on this to figure
173		 * out whether the address is resolved or not.  */
174		uint32_t insn1 = libsym->arch.resolved_value >> 32;
175		uint32_t insn2 = (uint32_t)libsym->arch.resolved_value;
176		if ((insn1 & BRANCH_MASK) == B_INSN
177		    || ((insn2 & BRANCH_MASK) == B_INSN
178			/* XXX double cast  */
179			&& (ppc_branch_dest(libsym->enter_addr + 4, insn2)
180			    == (void*)(long)libsym->lib->arch.pltgot_addr)))
181			mark_as_resolved(libsym, libsym->arch.resolved_value);
182
183		if (proc_activate_delayed_symbol(proc, libsym) < 0)
184			return;
185
186		/* XXX double cast  */
187		libsym->arch.plt_slot_addr
188			= (GElf_Addr)(uintptr_t)libsym->enter_addr;
189	}
190}
191
192GElf_Addr
193arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
194{
195	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
196		assert(lte->arch.plt_stub_vma != 0);
197		return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
198
199	} else if (lte->ehdr.e_machine == EM_PPC) {
200		return rela->r_offset;
201
202	} else {
203		/* If we get here, we don't have stub symbols.  In
204		 * that case we put brakpoints to PLT entries the same
205		 * as the PPC32 secure PLT case does.  */
206		assert(lte->arch.plt_stub_vma != 0);
207		return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx;
208	}
209}
210
211/* This entry point is called when ltelf is not available
212 * anymore--during runtime.  At that point we don't have to concern
213 * ourselves with bias, as the values in OPD have been resolved
214 * already.  */
215int
216arch_translate_address_dyn(struct process *proc,
217			   arch_addr_t addr, arch_addr_t *ret)
218{
219	if (proc->e_machine == EM_PPC64) {
220		uint64_t value;
221		if (proc_read_64(proc, addr, &value) < 0) {
222			fprintf(stderr,
223				"dynamic .opd translation of %p: %s\n",
224				addr, strerror(errno));
225			return -1;
226		}
227		/* XXX The double cast should be removed when
228		 * arch_addr_t becomes integral type.  */
229		*ret = (arch_addr_t)(uintptr_t)value;
230		return 0;
231	}
232
233	*ret = addr;
234	return 0;
235}
236
237int
238arch_translate_address(struct ltelf *lte,
239		       arch_addr_t addr, arch_addr_t *ret)
240{
241	if (lte->ehdr.e_machine == EM_PPC64) {
242		/* XXX The double cast should be removed when
243		 * arch_addr_t becomes integral type.  */
244		GElf_Xword offset
245			= (GElf_Addr)(uintptr_t)addr - lte->arch.opd_base;
246		uint64_t value;
247		if (elf_read_u64(lte->arch.opd_data, offset, &value) < 0) {
248			fprintf(stderr, "static .opd translation of %p: %s\n",
249				addr, elf_errmsg(-1));
250			return -1;
251		}
252		*ret = (arch_addr_t)(uintptr_t)(value + lte->bias);
253		return 0;
254	}
255
256	*ret = addr;
257	return 0;
258}
259
260static int
261load_opd_data(struct ltelf *lte, struct library *lib)
262{
263	Elf_Scn *sec;
264	GElf_Shdr shdr;
265	if (elf_get_section_named(lte, ".opd", &sec, &shdr) < 0
266	    || sec == NULL) {
267	fail:
268		fprintf(stderr, "couldn't find .opd data\n");
269		return -1;
270	}
271
272	lte->arch.opd_data = elf_rawdata(sec, NULL);
273	if (lte->arch.opd_data == NULL)
274		goto fail;
275
276	lte->arch.opd_base = shdr.sh_addr + lte->bias;
277	lte->arch.opd_size = shdr.sh_size;
278
279	return 0;
280}
281
282void *
283sym2addr(struct process *proc, struct library_symbol *sym)
284{
285	return sym->enter_addr;
286}
287
288static GElf_Addr
289get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
290{
291	Elf_Scn *ppcgot_sec = NULL;
292	GElf_Shdr ppcgot_shdr;
293	if (ppcgot != 0
294	    && (elf_get_section_covering(lte, ppcgot,
295					 &ppcgot_sec, &ppcgot_shdr) < 0
296		|| ppcgot_sec == NULL))
297		fprintf(stderr,
298			"DT_PPC_GOT=%#"PRIx64", but no such section found\n",
299			ppcgot);
300
301	if (ppcgot_sec != NULL) {
302		Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
303		if (data == NULL || data->d_size < 8 ) {
304			fprintf(stderr, "couldn't read GOT data\n");
305		} else {
306			// where PPCGOT begins in .got
307			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
308			assert(offset % 4 == 0);
309			uint32_t glink_vma;
310			if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
311				fprintf(stderr, "couldn't read glink VMA"
312					" address at %zd@GOT\n", offset);
313				return 0;
314			}
315			if (glink_vma != 0) {
316				debug(1, "PPC GOT glink_vma address: %#" PRIx32,
317				      glink_vma);
318				return (GElf_Addr)glink_vma;
319			}
320		}
321	}
322
323	if (plt_data != NULL) {
324		uint32_t glink_vma;
325		if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
326			fprintf(stderr, "couldn't read glink VMA address\n");
327			return 0;
328		}
329		debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
330		return (GElf_Addr)glink_vma;
331	}
332
333	return 0;
334}
335
336static int
337load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
338{
339	Elf_Scn *scn;
340	GElf_Shdr shdr;
341	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
342	    || scn == NULL) {
343	fail:
344		fprintf(stderr, "Couldn't get SHT_DYNAMIC: %s\n",
345			elf_errmsg(-1));
346		return -1;
347	}
348
349	Elf_Data *data = elf_loaddata(scn, &shdr);
350	if (data == NULL)
351		goto fail;
352
353	size_t j;
354	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
355		GElf_Dyn dyn;
356		if (gelf_getdyn(data, j, &dyn) == NULL)
357			goto fail;
358
359		if(dyn.d_tag == tag) {
360			*valuep = dyn.d_un.d_ptr;
361			return 0;
362		}
363	}
364
365	return -1;
366}
367
368static int
369nonzero_data(Elf_Data *data)
370{
371	/* We are not supposed to get here if there's no PLT.  */
372	assert(data != NULL);
373
374	unsigned char *buf = data->d_buf;
375	if (buf == NULL)
376		return 0;
377
378	size_t i;
379	for (i = 0; i < data->d_size; ++i)
380		if (buf[i] != 0)
381			return 1;
382	return 0;
383}
384
385int
386arch_elf_init(struct ltelf *lte, struct library *lib)
387{
388	if (lte->ehdr.e_machine == EM_PPC64
389	    && load_opd_data(lte, lib) < 0)
390		return -1;
391
392	lte->arch.secure_plt = !(lte->plt_flags & SHF_EXECINSTR);
393
394	/* For PPC32 BSS, it is important whether the binary was
395	 * prelinked.  If .plt section is NODATA, or if it contains
396	 * zeroes, then this library is not prelinked, and we need to
397	 * delay breakpoints.  */
398	if (lte->ehdr.e_machine == EM_PPC && !lte->arch.secure_plt)
399		lib->arch.bss_plt_prelinked = nonzero_data(lte->plt_data);
400	else
401		/* For cases where it's irrelevant, initialize the
402		 * value to something conspicuous.  */
403		lib->arch.bss_plt_prelinked = -1;
404
405	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
406		GElf_Addr ppcgot;
407		if (load_dynamic_entry(lte, DT_PPC_GOT, &ppcgot) < 0) {
408			fprintf(stderr, "couldn't find DT_PPC_GOT\n");
409			return -1;
410		}
411		GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
412
413		assert(lte->relplt_size % 12 == 0);
414		size_t count = lte->relplt_size / 12; // size of RELA entry
415		lte->arch.plt_stub_vma = glink_vma
416			- (GElf_Addr)count * PPC_PLT_STUB_SIZE;
417		debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
418
419	} else if (lte->ehdr.e_machine == EM_PPC64) {
420		GElf_Addr glink_vma;
421		if (load_dynamic_entry(lte, DT_PPC64_GLINK, &glink_vma) < 0) {
422			fprintf(stderr, "couldn't find DT_PPC64_GLINK\n");
423			return -1;
424		}
425
426		/* The first glink stub starts at offset 32.  */
427		lte->arch.plt_stub_vma = glink_vma + 32;
428
429	} else {
430		/* By exhaustion--PPC32 BSS.  */
431		if (load_dynamic_entry(lte, DT_PLTGOT,
432				       &lib->arch.pltgot_addr) < 0) {
433			fprintf(stderr, "couldn't find DT_PLTGOT\n");
434			return -1;
435		}
436	}
437
438	/* On PPC64, look for stub symbols in symbol table.  These are
439	 * called: xxxxxxxx.plt_call.callee_name@version+addend.  */
440	if (lte->ehdr.e_machine == EM_PPC64
441	    && lte->symtab != NULL && lte->strtab != NULL) {
442
443		/* N.B. We can't simply skip the symbols that we fail
444		 * to read or malloc.  There may be more than one stub
445		 * per symbol name, and if we failed in one but
446		 * succeeded in another, the PLT enabling code would
447		 * have no way to tell that something is missing.  We
448		 * could work around that, of course, but it doesn't
449		 * seem worth the trouble.  So if anything fails, we
450		 * just pretend that we don't have stub symbols at
451		 * all, as if the binary is stripped.  */
452
453		size_t i;
454		for (i = 0; i < lte->symtab_count; ++i) {
455			GElf_Sym sym;
456			if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
457				struct library_symbol *sym, *next;
458			fail:
459				for (sym = lte->arch.stubs; sym != NULL; ) {
460					next = sym->next;
461					library_symbol_destroy(sym);
462					free(sym);
463					sym = next;
464				}
465				lte->arch.stubs = NULL;
466				break;
467			}
468
469			const char *name = lte->strtab + sym.st_name;
470
471#define STUBN ".plt_call."
472			if ((name = strstr(name, STUBN)) == NULL)
473				continue;
474			name += sizeof(STUBN) - 1;
475#undef STUBN
476
477			size_t len;
478			const char *ver = strchr(name, '@');
479			if (ver != NULL) {
480				len = ver - name;
481
482			} else {
483				/* If there is "+" at all, check that
484				 * the symbol name ends in "+0".  */
485				const char *add = strrchr(name, '+');
486				if (add != NULL) {
487					assert(strcmp(add, "+0") == 0);
488					len = add - name;
489				} else {
490					len = strlen(name);
491				}
492			}
493
494			char *sym_name = strndup(name, len);
495			struct library_symbol *libsym = malloc(sizeof(*libsym));
496			if (sym_name == NULL || libsym == NULL) {
497			fail2:
498				free(sym_name);
499				free(libsym);
500				goto fail;
501			}
502
503			/* XXX The double cast should be removed when
504			 * arch_addr_t becomes integral type.  */
505			arch_addr_t addr = (arch_addr_t)
506				(uintptr_t)sym.st_value + lte->bias;
507			if (library_symbol_init(libsym, addr, sym_name, 1,
508						LS_TOPLT_EXEC) < 0)
509				goto fail2;
510			libsym->arch.type = PPC64_PLT_STUB;
511			libsym->next = lte->arch.stubs;
512			lte->arch.stubs = libsym;
513		}
514	}
515
516	return 0;
517}
518
519static int
520read_plt_slot_value(struct process *proc, GElf_Addr addr, GElf_Addr *valp)
521{
522	/* On PPC64, we read from .plt, which contains 8 byte
523	 * addresses.  On PPC32 we read from .plt, which contains 4
524	 * byte instructions, but the PLT is two instructions, and
525	 * either can change.  */
526	uint64_t l;
527	/* XXX double cast.  */
528	if (proc_read_64(proc, (arch_addr_t)(uintptr_t)addr, &l) < 0) {
529		fprintf(stderr, "ptrace .plt slot value @%#" PRIx64": %s\n",
530			addr, strerror(errno));
531		return -1;
532	}
533
534	*valp = (GElf_Addr)l;
535	return 0;
536}
537
538static int
539unresolve_plt_slot(struct process *proc, GElf_Addr addr, GElf_Addr value)
540{
541	/* We only modify plt_entry[0], which holds the resolved
542	 * address of the routine.  We keep the TOC and environment
543	 * pointers intact.  Hence the only adjustment that we need to
544	 * do is to IP.  */
545	if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) {
546		fprintf(stderr, "failed to unresolve .plt slot: %s\n",
547			strerror(errno));
548		return -1;
549	}
550	return 0;
551}
552
553enum plt_status
554arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
555		       const char *a_name, GElf_Rela *rela, size_t ndx,
556		       struct library_symbol **ret)
557{
558	if (lte->ehdr.e_machine == EM_PPC) {
559		if (lte->arch.secure_plt)
560			return PLT_DEFAULT;
561
562		struct library_symbol *libsym = NULL;
563		if (default_elf_add_plt_entry(proc, lte, a_name, rela, ndx,
564					      &libsym) < 0)
565			return PLT_FAIL;
566
567		/* On PPC32 with BSS PLT, delay the symbol until
568		 * dynamic linker is done.  */
569		assert(!libsym->delayed);
570		libsym->delayed = 1;
571
572		*ret = libsym;
573		return PLT_OK;
574	}
575
576	/* PPC64.  If we have stubs, we return a chain of breakpoint
577	 * sites, one for each stub that corresponds to this PLT
578	 * entry.  */
579	struct library_symbol *chain = NULL;
580	struct library_symbol **symp;
581	for (symp = &lte->arch.stubs; *symp != NULL; ) {
582		struct library_symbol *sym = *symp;
583		if (strcmp(sym->name, a_name) != 0) {
584			symp = &(*symp)->next;
585			continue;
586		}
587
588		/* Re-chain the symbol from stubs to CHAIN.  */
589		*symp = sym->next;
590		sym->next = chain;
591		chain = sym;
592	}
593
594	if (chain != NULL) {
595		*ret = chain;
596		return PLT_OK;
597	}
598
599	/* We don't have stub symbols.  Find corresponding .plt slot,
600	 * and check whether it contains the corresponding PLT address
601	 * (or 0 if the dynamic linker hasn't run yet).  N.B. we don't
602	 * want read this from ELF file, but from process image.  That
603	 * makes a difference if we are attaching to a running
604	 * process.  */
605
606	GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela);
607	GElf_Addr plt_slot_addr = rela->r_offset;
608	assert(plt_slot_addr >= lte->plt_addr
609	       || plt_slot_addr < lte->plt_addr + lte->plt_size);
610
611	GElf_Addr plt_slot_value;
612	if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0)
613		return PLT_FAIL;
614
615	char *name = strdup(a_name);
616	struct library_symbol *libsym = malloc(sizeof(*libsym));
617	if (name == NULL || libsym == NULL) {
618		fprintf(stderr, "allocation for .plt slot: %s\n",
619			strerror(errno));
620	fail:
621		free(name);
622		free(libsym);
623		return PLT_FAIL;
624	}
625
626	/* XXX The double cast should be removed when
627	 * arch_addr_t becomes integral type.  */
628	if (library_symbol_init(libsym,
629				(arch_addr_t)(uintptr_t)plt_entry_addr,
630				name, 1, LS_TOPLT_EXEC) < 0)
631		goto fail;
632	libsym->arch.plt_slot_addr = plt_slot_addr;
633
634	if (plt_slot_value == plt_entry_addr || plt_slot_value == 0) {
635		libsym->arch.type = PPC_PLT_UNRESOLVED;
636		libsym->arch.resolved_value = plt_entry_addr;
637
638	} else {
639		/* Unresolve the .plt slot.  If the binary was
640		 * prelinked, this makes the code invalid, because in
641		 * case of prelinked binary, the dynamic linker
642		 * doesn't update .plt[0] and .plt[1] with addresses
643		 * of the resover.  But we don't care, we will never
644		 * need to enter the resolver.  That just means that
645		 * we have to un-un-resolve this back before we
646		 * detach.  */
647
648		if (unresolve_plt_slot(proc, plt_slot_addr, plt_entry_addr) < 0) {
649			library_symbol_destroy(libsym);
650			goto fail;
651		}
652		mark_as_resolved(libsym, plt_slot_value);
653	}
654
655	*ret = libsym;
656	return PLT_OK;
657}
658
659void
660arch_elf_destroy(struct ltelf *lte)
661{
662	struct library_symbol *sym;
663	for (sym = lte->arch.stubs; sym != NULL; ) {
664		struct library_symbol *next = sym->next;
665		library_symbol_destroy(sym);
666		free(sym);
667		sym = next;
668	}
669}
670
671static void
672dl_plt_update_bp_on_hit(struct breakpoint *bp, struct process *proc)
673{
674	debug(DEBUG_PROCESS, "pid=%d dl_plt_update_bp_on_hit %s(%p)",
675	      proc->pid, breakpoint_name(bp), bp->addr);
676	struct process_stopping_handler *self = proc->arch.handler;
677	assert(self != NULL);
678
679	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
680	GElf_Addr value;
681	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
682		return;
683
684	/* On PPC64, we rewrite the slot value.  */
685	if (proc->e_machine == EM_PPC64)
686		unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
687				   libsym->arch.resolved_value);
688	/* We mark the breakpoint as resolved on both arches.  */
689	mark_as_resolved(libsym, value);
690
691	/* cb_on_all_stopped looks if HANDLER is set to NULL as a way
692	 * to check that this was run.  It's an error if it
693	 * wasn't.  */
694	proc->arch.handler = NULL;
695
696	breakpoint_turn_off(bp, proc);
697}
698
699static void
700cb_on_all_stopped(struct process_stopping_handler *self)
701{
702	/* Put that in for dl_plt_update_bp_on_hit to see.  */
703	assert(self->task_enabling_breakpoint->arch.handler == NULL);
704	self->task_enabling_breakpoint->arch.handler = self;
705
706	linux_ptrace_disable_and_continue(self);
707}
708
709static enum callback_status
710cb_keep_stepping_p(struct process_stopping_handler *self)
711{
712	struct process *proc = self->task_enabling_breakpoint;
713	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
714
715	GElf_Addr value;
716	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
717		return CBS_FAIL;
718
719	/* In UNRESOLVED state, the RESOLVED_VALUE in fact contains
720	 * the PLT entry value.  */
721	if (value == libsym->arch.resolved_value)
722		return CBS_CONT;
723
724	debug(DEBUG_PROCESS, "pid=%d PLT got resolved to value %#"PRIx64,
725	      proc->pid, value);
726
727	/* The .plt slot got resolved!  We can migrate the breakpoint
728	 * to RESOLVED and stop single-stepping.  */
729	if (proc->e_machine == EM_PPC64
730	    && unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
731				  libsym->arch.resolved_value) < 0)
732		return CBS_FAIL;
733
734	/* Resolving on PPC64 consists of overwriting a doubleword in
735	 * .plt.  That doubleword is than read back by a stub, and
736	 * jumped on.  Hopefully we can assume that double word update
737	 * is done on a single place only, as it contains a final
738	 * address.  We still need to look around for any sync
739	 * instruction, but essentially it is safe to optimize away
740	 * the single stepping next time and install a post-update
741	 * breakpoint.
742	 *
743	 * The situation on PPC32 BSS is more complicated.  The
744	 * dynamic linker here updates potentially several
745	 * instructions (XXX currently we assume two) and the rules
746	 * are more complicated.  Sometimes it's enough to adjust just
747	 * one of the addresses--the logic for generating optimal
748	 * dispatch depends on relative addresses of the .plt entry
749	 * and the jump destination.  We can't assume that the some
750	 * instruction block does the update every time.  So on PPC32,
751	 * we turn the optimization off and just step through it each
752	 * time.  */
753	if (proc->e_machine == EM_PPC)
754		goto done;
755
756	/* Install breakpoint to the address where the change takes
757	 * place.  If we fail, then that just means that we'll have to
758	 * singlestep the next time around as well.  */
759	struct process *leader = proc->leader;
760	if (leader == NULL || leader->arch.dl_plt_update_bp != NULL)
761		goto done;
762
763	/* We need to install to the next instruction.  ADDR points to
764	 * a store instruction, so moving the breakpoint one
765	 * instruction forward is safe.  */
766	arch_addr_t addr = get_instruction_pointer(proc) + 4;
767	leader->arch.dl_plt_update_bp = insert_breakpoint(proc, addr, NULL);
768	if (leader->arch.dl_plt_update_bp == NULL)
769		goto done;
770
771	static struct bp_callbacks dl_plt_update_cbs = {
772		.on_hit = dl_plt_update_bp_on_hit,
773	};
774	leader->arch.dl_plt_update_bp->cbs = &dl_plt_update_cbs;
775
776	/* Turn it off for now.  We will turn it on again when we hit
777	 * the PLT entry that needs this.  */
778	breakpoint_turn_off(leader->arch.dl_plt_update_bp, proc);
779
780done:
781	mark_as_resolved(libsym, value);
782
783	return CBS_STOP;
784}
785
786static void
787jump_to_entry_point(struct process *proc, struct breakpoint *bp)
788{
789	/* XXX The double cast should be removed when
790	 * arch_addr_t becomes integral type.  */
791	arch_addr_t rv = (arch_addr_t)
792		(uintptr_t)bp->libsym->arch.resolved_value;
793	set_instruction_pointer(proc, rv);
794}
795
796static void
797ppc_plt_bp_continue(struct breakpoint *bp, struct process *proc)
798{
799	switch (bp->libsym->arch.type) {
800		struct process *leader;
801		void (*on_all_stopped)(struct process_stopping_handler *);
802		enum callback_status (*keep_stepping_p)
803			(struct process_stopping_handler *);
804
805	case PPC_DEFAULT:
806		assert(proc->e_machine == EM_PPC);
807		assert(bp->libsym != NULL);
808		assert(bp->libsym->lib->arch.bss_plt_prelinked == 0);
809		/* Fall through.  */
810
811	case PPC_PLT_UNRESOLVED:
812		on_all_stopped = NULL;
813		keep_stepping_p = NULL;
814		leader = proc->leader;
815
816		if (leader != NULL && leader->arch.dl_plt_update_bp != NULL
817		    && breakpoint_turn_on(leader->arch.dl_plt_update_bp,
818					  proc) >= 0)
819			on_all_stopped = cb_on_all_stopped;
820		else
821			keep_stepping_p = cb_keep_stepping_p;
822
823		if (process_install_stopping_handler
824		    (proc, bp, on_all_stopped, keep_stepping_p, NULL) < 0) {
825			fprintf(stderr,	"ppc_plt_bp_continue: "
826				"couldn't install event handler\n");
827			continue_after_breakpoint(proc, bp);
828		}
829		return;
830
831	case PPC_PLT_RESOLVED:
832		if (proc->e_machine == EM_PPC) {
833			continue_after_breakpoint(proc, bp);
834			return;
835		}
836
837		jump_to_entry_point(proc, bp);
838		continue_process(proc->pid);
839		return;
840
841	case PPC64_PLT_STUB:
842		/* These should never hit here.  */
843		break;
844	}
845
846	assert(bp->libsym->arch.type != bp->libsym->arch.type);
847	abort();
848}
849
850/* When a process is in a PLT stub, it may have already read the data
851 * in .plt that we changed.  If we detach now, it will jump to PLT
852 * entry and continue to the dynamic linker, where it will SIGSEGV,
853 * because zeroth .plt slot is not filled in prelinked binaries, and
854 * the dynamic linker needs that data.  Moreover, the process may
855 * actually have hit the breakpoint already.  This functions tries to
856 * detect both cases and do any fix-ups necessary to mend this
857 * situation.  */
858static enum callback_status
859detach_task_cb(struct process *task, void *data)
860{
861	struct breakpoint *bp = data;
862
863	if (get_instruction_pointer(task) == bp->addr) {
864		debug(DEBUG_PROCESS, "%d at %p, which is PLT slot",
865		      task->pid, bp->addr);
866		jump_to_entry_point(task, bp);
867		return CBS_CONT;
868	}
869
870	/* XXX There's still a window of several instructions where we
871	 * might catch the task inside a stub such that it has already
872	 * read destination address from .plt, but hasn't jumped yet,
873	 * thus avoiding the breakpoint.  */
874
875	return CBS_CONT;
876}
877
878static void
879ppc_plt_bp_retract(struct breakpoint *bp, struct process *proc)
880{
881	/* On PPC64, we rewrite .plt with PLT entry addresses.  This
882	 * needs to be undone.  Unfortunately, the program may have
883	 * made decisions based on that value */
884	if (proc->e_machine == EM_PPC64
885	    && bp->libsym != NULL
886	    && bp->libsym->arch.type == PPC_PLT_RESOLVED) {
887		each_task(proc->leader, NULL, detach_task_cb, bp);
888		unresolve_plt_slot(proc, bp->libsym->arch.plt_slot_addr,
889				   bp->libsym->arch.resolved_value);
890	}
891}
892
893int
894arch_library_init(struct library *lib)
895{
896	return 0;
897}
898
899void
900arch_library_destroy(struct library *lib)
901{
902}
903
904int
905arch_library_clone(struct library *retp, struct library *lib)
906{
907	return 0;
908}
909
910int
911arch_library_symbol_init(struct library_symbol *libsym)
912{
913	/* We set type explicitly in the code above, where we have the
914	 * necessary context.  This is for calls from ltrace-elf.c and
915	 * such.  */
916	libsym->arch.type = PPC_DEFAULT;
917	return 0;
918}
919
920void
921arch_library_symbol_destroy(struct library_symbol *libsym)
922{
923}
924
925int
926arch_library_symbol_clone(struct library_symbol *retp,
927			  struct library_symbol *libsym)
928{
929	retp->arch = libsym->arch;
930	return 0;
931}
932
933/* For some symbol types, we need to set up custom callbacks.  XXX we
934 * don't need PROC here, we can store the data in BP if it is of
935 * interest to us.  */
936int
937arch_breakpoint_init(struct process *proc, struct breakpoint *bp)
938{
939	/* Artificial and entry-point breakpoints are plain.  */
940	if (bp->libsym == NULL || bp->libsym->plt_type != LS_TOPLT_EXEC)
941		return 0;
942
943	/* On PPC, secure PLT and prelinked BSS PLT are plain.  */
944	if (proc->e_machine == EM_PPC
945	    && bp->libsym->lib->arch.bss_plt_prelinked != 0)
946		return 0;
947
948	/* On PPC64, stub PLT breakpoints are plain.  */
949	if (proc->e_machine == EM_PPC64
950	    && bp->libsym->arch.type == PPC64_PLT_STUB)
951		return 0;
952
953	static struct bp_callbacks cbs = {
954		.on_continue = ppc_plt_bp_continue,
955		.on_retract = ppc_plt_bp_retract,
956	};
957	breakpoint_set_callbacks(bp, &cbs);
958	return 0;
959}
960
961void
962arch_breakpoint_destroy(struct breakpoint *bp)
963{
964}
965
966int
967arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp)
968{
969	retp->arch = sbp->arch;
970	return 0;
971}
972
973int
974arch_process_init(struct process *proc)
975{
976	proc->arch.dl_plt_update_bp = NULL;
977	proc->arch.handler = NULL;
978	return 0;
979}
980
981void
982arch_process_destroy(struct process *proc)
983{
984}
985
986int
987arch_process_clone(struct process *retp, struct process *proc)
988{
989	retp->arch = proc->arch;
990	return 0;
991}
992
993int
994arch_process_exec(struct process *proc)
995{
996	return arch_process_init(proc);
997}
998