plt.c revision e8d9076a97f6617868466a99bd18e11e3f6389ac
1#include <gelf.h>
2#include <sys/ptrace.h>
3#include <errno.h>
4#include <error.h>
5#include <inttypes.h>
6#include <assert.h>
7#include <string.h>
8
9#include "proc.h"
10#include "common.h"
11#include "library.h"
12#include "breakpoint.h"
13#include "linux-gnu/trace.h"
14
15/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
16 * new-style "secure" PLT.  We can tell one from the other by the
17 * flags on the .plt section.  If it's +X (executable), it's BSS PLT,
18 * otherwise it's secure.
19 *
20 * BSS PLT works the same way as most architectures: the .plt section
21 * contains trampolines and we put breakpoints to those.  With secure
22 * PLT, the .plt section doesn't contain instructions but addresses.
23 * The real PLT table is stored in .text.  Addresses of those PLT
24 * entries can be computed, and it fact that's what the glink deal
25 * below does.
26 *
27 * If not prelinked, BSS PLT entries in the .plt section contain
28 * zeroes that are overwritten by the dynamic linker during start-up.
29 * For that reason, ltrace realizes those breakpoints only after
30 * .start is hit.
31 *
32 * 64-bit PPC is more involved.  Program linker creates for each
33 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
34 * (where xxxxxxxx is a hexadecimal number).  That stub does the call
35 * dispatch: it loads an address of a function to call from the
36 * section .plt, and branches.  PLT entries themselves are essentially
37 * a curried call to the resolver.  When the symbol is resolved, the
38 * resolver updates the value stored in .plt, and the next time
39 * around, the stub calls the library function directly.  So we make
40 * at most one trip (none if the binary is prelinked) through each PLT
41 * entry, and correspondingly that is useless as a breakpoint site.
42 *
43 * Note the three confusing terms: stubs (that play the role of PLT
44 * entries), PLT entries, .plt section.
45 *
46 * We first check symbol tables and see if we happen to have stub
47 * symbols available.  If yes we just put breakpoints to those, and
48 * treat them as usual breakpoints.  The only tricky part is realizing
49 * that there can be more than one breakpoint per symbol.
50 *
51 * The case that we don't have the stub symbols available is harder.
52 * The following scheme uses two kinds of PLT breakpoints: unresolved
53 * and resolved (to some address).  When the process starts (or when
54 * we attach), we distribute unresolved PLT breakpoints to the PLT
55 * entries (not stubs).  Then we look in .plt, and for each entry
56 * whose value is different than the corresponding PLT entry address,
57 * we assume it was already resolved, and convert the breakpoint to
58 * resolved.  We also rewrite the resolved value in .plt back to the
59 * PLT address.
60 *
61 * When a PLT entry hits a resolved breakpoint (which happens because
62 * we put back the unresolved addresses to .plt), we move the
63 * instruction pointer to the corresponding address and continue the
64 * process as if nothing happened.
65 *
66 * When unresolved PLT entry is called for the first time, we need to
67 * catch the new value that the resolver will write to a .plt slot.
68 * We also need to prevent another thread from racing through and
69 * taking the branch without ltrace noticing.  So when unresolved PLT
70 * entry hits, we have to stop all threads.  We then single-step
71 * through the resolver, until the .plt slot changes.  When it does,
72 * we treat it the same way as above: convert the PLT breakpoint to
73 * resolved, and rewrite the .plt value back to PLT address.  We then
74 * start all threads again.
75 *
76 * In theory we might find the exact instruction that will update the
77 * .plt slot, and emulate it, updating the PLT breakpoint immediately,
78 * and then just skip it.  But that's even messier than the thread
79 * stopping business and single stepping that needs to be done.
80 *
81 * Short of doing this we really have to stop everyone.  There is no
82 * way around that.  Unless we know where the stubs are, we don't have
83 * a way to catch a thread that would use the window of opportunity
84 * between updating .plt and notifying ltrace about the singlestep.
85 */
86
87#define PPC_PLT_STUB_SIZE 16
88#define PPC64_PLT_STUB_SIZE 8 //xxx
89
90static inline int
91host_powerpc64()
92{
93#ifdef __powerpc64__
94	return 1;
95#else
96	return 0;
97#endif
98}
99
100GElf_Addr
101arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
102{
103	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
104		assert(lte->arch.plt_stub_vma != 0);
105		return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
106
107	} else if (lte->ehdr.e_machine == EM_PPC) {
108		return rela->r_offset;
109
110	} else {
111		/* If we get here, we don't have stub symbols.  In
112		 * that case we put brakpoints to PLT entries the same
113		 * as the PPC32 secure PLT case does.  */
114		assert(lte->arch.plt_stub_vma != 0);
115		return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx;
116	}
117}
118
119int
120arch_translate_address(struct Process *proc,
121		       target_address_t addr, target_address_t *ret)
122{
123	if (proc->e_machine == EM_PPC64) {
124		assert(host_powerpc64());
125		long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
126		if (l == -1 && errno) {
127			error(0, errno, ".opd translation of %p", addr);
128			return -1;
129		}
130		*ret = (target_address_t)l;
131		return 0;
132	}
133
134	*ret = addr;
135	return 0;
136}
137
138void *
139sym2addr(struct Process *proc, struct library_symbol *sym)
140{
141	return sym->enter_addr;
142}
143
144static GElf_Addr
145get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
146{
147	Elf_Scn *ppcgot_sec = NULL;
148	GElf_Shdr ppcgot_shdr;
149	if (ppcgot != 0
150	    && elf_get_section_covering(lte, ppcgot,
151					&ppcgot_sec, &ppcgot_shdr) < 0)
152		error(0, 0, "DT_PPC_GOT=%#"PRIx64", but no such section found",
153		      ppcgot);
154
155	if (ppcgot_sec != NULL) {
156		Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
157		if (data == NULL || data->d_size < 8 ) {
158			error(0, 0, "couldn't read GOT data");
159		} else {
160			// where PPCGOT begins in .got
161			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
162			assert(offset % 4 == 0);
163			uint32_t glink_vma;
164			if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
165				error(0, 0, "couldn't read glink VMA address"
166				      " at %zd@GOT", offset);
167				return 0;
168			}
169			if (glink_vma != 0) {
170				debug(1, "PPC GOT glink_vma address: %#" PRIx32,
171				      glink_vma);
172				return (GElf_Addr)glink_vma;
173			}
174		}
175	}
176
177	if (plt_data != NULL) {
178		uint32_t glink_vma;
179		if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
180			error(0, 0, "couldn't read glink VMA address");
181			return 0;
182		}
183		debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
184		return (GElf_Addr)glink_vma;
185	}
186
187	return 0;
188}
189
190static int
191load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
192{
193	Elf_Scn *scn;
194	GElf_Shdr shdr;
195	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
196	    || scn == NULL) {
197	fail:
198		error(0, 0, "Couldn't get SHT_DYNAMIC: %s",
199		      elf_errmsg(-1));
200		return -1;
201	}
202
203	Elf_Data *data = elf_loaddata(scn, &shdr);
204	if (data == NULL)
205		goto fail;
206
207	size_t j;
208	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
209		GElf_Dyn dyn;
210		if (gelf_getdyn(data, j, &dyn) == NULL)
211			goto fail;
212
213		if(dyn.d_tag == tag) {
214			*valuep = dyn.d_un.d_ptr;
215			return 0;
216		}
217	}
218
219	return -1;
220}
221
222static int
223load_ppcgot(struct ltelf *lte, GElf_Addr *ppcgotp)
224{
225	return load_dynamic_entry(lte, DT_PPC_GOT, ppcgotp);
226}
227
228static int
229load_ppc64_glink(struct ltelf *lte, GElf_Addr *glinkp)
230{
231	return load_dynamic_entry(lte, DT_PPC64_GLINK, glinkp);
232}
233
234int
235arch_elf_init(struct ltelf *lte)
236{
237	lte->arch.secure_plt = !(lte->plt_flags & SHF_EXECINSTR);
238	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
239		GElf_Addr ppcgot;
240		if (load_ppcgot(lte, &ppcgot) < 0) {
241			error(0, 0, "couldn't find DT_PPC_GOT");
242			return -1;
243		}
244		GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
245
246		assert (lte->relplt_size % 12 == 0);
247		size_t count = lte->relplt_size / 12; // size of RELA entry
248		lte->arch.plt_stub_vma = glink_vma
249			- (GElf_Addr)count * PPC_PLT_STUB_SIZE;
250		debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
251
252	} else if (lte->ehdr.e_machine == EM_PPC64) {
253		GElf_Addr glink_vma;
254		if (load_ppc64_glink(lte, &glink_vma) < 0) {
255			error(0, 0, "couldn't find DT_PPC64_GLINK");
256			return -1;
257		}
258
259		/* The first glink stub starts at offset 32.  */
260		lte->arch.plt_stub_vma = glink_vma + 32;
261	}
262
263	/* On PPC64, look for stub symbols in symbol table.  These are
264	 * called: xxxxxxxx.plt_call.callee_name@version+addend.  */
265	if (lte->ehdr.e_machine == EM_PPC64
266	    && lte->symtab != NULL && lte->strtab != NULL) {
267
268		/* N.B. We can't simply skip the symbols that we fail
269		 * to read or malloc.  There may be more than one stub
270		 * per symbol name, and if we failed in one but
271		 * succeeded in another, the PLT enabling code would
272		 * have no way to tell that something is missing.  We
273		 * could work around that, of course, but it doesn't
274		 * seem worth the trouble.  So if anything fails, we
275		 * just pretend that we don't have stub symbols at
276		 * all, as if the binary is stripped.  */
277
278		size_t i;
279		for (i = 0; i < lte->symtab_count; ++i) {
280			GElf_Sym sym;
281			if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
282				struct library_symbol *sym, *next;
283			fail:
284				for (sym = lte->arch.stubs; sym != NULL; ) {
285					next = sym->next;
286					library_symbol_destroy(sym);
287					free(sym);
288					sym = next;
289				}
290				lte->arch.stubs = NULL;
291				break;
292			}
293
294			const char *name = lte->strtab + sym.st_name;
295
296#define STUBN ".plt_call."
297			if ((name = strstr(name, STUBN)) == NULL)
298				continue;
299			name += sizeof(STUBN) - 1;
300#undef STUBN
301
302			size_t len;
303			const char *ver = strchr(name, '@');
304			if (ver != NULL) {
305				len = ver - name;
306
307			} else {
308				/* If there is "+" at all, check that
309				 * the symbol name ends in "+0".  */
310				const char *add = strrchr(name, '+');
311				if (add != NULL) {
312					assert(strcmp(add, "+0") == 0);
313					len = add - name;
314				} else {
315					len = strlen(name);
316				}
317			}
318
319			char *sym_name = strndup(name, len);
320			struct library_symbol *libsym = malloc(sizeof(*libsym));
321			if (sym_name == NULL || libsym == NULL) {
322			fail2:
323				free(sym_name);
324				free(libsym);
325				goto fail;
326			}
327
328			target_address_t addr
329				= (target_address_t)sym.st_value + lte->bias;
330			if (library_symbol_init(libsym, addr, sym_name, 1,
331						LS_TOPLT_EXEC) < 0)
332				goto fail2;
333			libsym->arch.type = PPC64PLT_STUB;
334			libsym->next = lte->arch.stubs;
335			lte->arch.stubs = libsym;
336		}
337	}
338
339	return 0;
340}
341
342static int
343read_plt_slot_value(struct Process *proc, GElf_Addr addr, GElf_Addr *valp)
344{
345	/* on PPC32 we need to do things differently, but PPC64/PPC32
346	 * is currently not supported anyway.  */
347	assert(host_powerpc64());
348
349	long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
350	if (l == -1 && errno != 0) {
351		error(0, errno, "ptrace .plt slot value @%#" PRIx64, addr);
352		return -1;
353	}
354
355	*valp = (GElf_Addr)l;
356	return 0;
357}
358
359static int
360unresolve_plt_slot(struct Process *proc, GElf_Addr addr, GElf_Addr value)
361{
362	/* We only modify plt_entry[0], which holds the resolved
363	 * address of the routine.  We keep the TOC and environment
364	 * pointers intact.  Hence the only adjustment that we need to
365	 * do is to IP.  */
366	if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) {
367		error(0, errno, "unresolve .plt slot");
368		return -1;
369	}
370	return 0;
371}
372
373enum plt_status
374arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
375		       const char *a_name, GElf_Rela *rela, size_t ndx,
376		       struct library_symbol **ret)
377{
378	if (lte->ehdr.e_machine == EM_PPC)
379		return plt_default;
380
381	/* PPC64.  If we have stubs, we return a chain of breakpoint
382	 * sites, one for each stub that corresponds to this PLT
383	 * entry.  */
384	struct library_symbol *chain = NULL;
385	struct library_symbol **symp;
386	for (symp = &lte->arch.stubs; *symp != NULL; ) {
387		struct library_symbol *sym = *symp;
388		if (strcmp(sym->name, a_name) != 0) {
389			symp = &(*symp)->next;
390			continue;
391		}
392
393		/* Re-chain the symbol from stubs to CHAIN.  */
394		*symp = sym->next;
395		sym->next = chain;
396		chain = sym;
397	}
398
399	if (chain != NULL) {
400		*ret = chain;
401		return plt_ok;
402	}
403
404	/* We don't have stub symbols.  Find corresponding .plt slot,
405	 * and check whether it contains the corresponding PLT address
406	 * (or 0 if the dynamic linker hasn't run yet).  N.B. we don't
407	 * want read this from ELF file, but from process image.  That
408	 * makes a difference if we are attaching to a running
409	 * process.  */
410
411	GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela);
412	GElf_Addr plt_slot_addr = rela->r_offset;
413	assert(plt_slot_addr >= lte->plt_addr
414	       || plt_slot_addr < lte->plt_addr + lte->plt_size);
415
416	GElf_Addr plt_slot_value;
417	if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0)
418		return plt_fail;
419
420	char *name = strdup(a_name);
421	struct library_symbol *libsym = malloc(sizeof(*libsym));
422	if (name == NULL || libsym == NULL) {
423		error(0, errno, "allocation for .plt slot");
424	fail:
425		free(name);
426		free(libsym);
427		return plt_fail;
428	}
429
430	if (library_symbol_init(libsym, (target_address_t)plt_entry_addr,
431				name, 1, LS_TOPLT_EXEC) < 0)
432		goto fail;
433	libsym->arch.plt_slot_addr = plt_slot_addr;
434
435	if (plt_slot_value == plt_entry_addr || plt_slot_value == 0) {
436		libsym->arch.type = PPC64PLT_UNRESOLVED;
437		libsym->arch.resolved_value = plt_entry_addr;
438
439	} else {
440		/* Unresolve the .plt slot.  If the binary was
441		 * prelinked, this makes the code invalid, because in
442		 * case of prelinked binary, the dynamic linker
443		 * doesn't update .plt[0] and .plt[1] with addresses
444		 * of the resover.  But we don't care, we will never
445		 * need to enter the resolver.  That just means that
446		 * we have to un-un-resolve this back before we
447		 * detach, which is nothing new: we already need to
448		 * retract breakpoints.  */
449
450		if (unresolve_plt_slot(proc, plt_slot_addr, plt_entry_addr) < 0)
451			goto fail;
452		libsym->arch.type = PPC64PLT_RESOLVED;
453		libsym->arch.resolved_value = plt_slot_value;
454	}
455
456	*ret = libsym;
457	return plt_ok;
458}
459
460void
461arch_elf_destroy(struct ltelf *lte)
462{
463	struct library_symbol *sym;
464	for (sym = lte->arch.stubs; sym != NULL; ) {
465		struct library_symbol *next = sym->next;
466		library_symbol_destroy(sym);
467		free(sym);
468		sym = next;
469	}
470}
471
472static enum callback_status
473keep_stepping_p(struct process_stopping_handler *self)
474{
475	struct Process *proc = self->task_enabling_breakpoint;
476	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
477	GElf_Addr value;
478	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
479		return CBS_FAIL;
480
481	/* In UNRESOLVED state, the RESOLVED_VALUE in fact contains
482	 * the PLT entry value.  */
483	if (value == libsym->arch.resolved_value)
484		return CBS_CONT;
485
486	/* The .plt slot got resolved!  We can migrate the breakpoint
487	 * to RESOLVED and stop single-stepping.  */
488	if (unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
489			       libsym->arch.resolved_value) < 0)
490		return CBS_FAIL;
491	libsym->arch.type = PPC64PLT_RESOLVED;
492	libsym->arch.resolved_value = value;
493
494	return CBS_STOP;
495}
496
497static void
498ppc64_plt_bp_continue(struct breakpoint *bp, struct Process *proc)
499{
500	switch (bp->libsym->arch.type) {
501		target_address_t rv;
502	case PPC64PLT_UNRESOLVED:
503		if (process_install_stopping_handler(proc, bp, NULL,
504						     &keep_stepping_p,
505						     NULL) < 0) {
506			perror("ppc64_unresolved_bp_continue: couldn't install"
507			       " event handler");
508			continue_after_breakpoint(proc, bp);
509		}
510		return;
511
512	case PPC64PLT_RESOLVED:
513		rv = (target_address_t)bp->libsym->arch.resolved_value;
514		set_instruction_pointer(proc, rv);
515		continue_process(proc->pid);
516		return;
517
518	case PPC64PLT_STUB:
519		break;
520	}
521
522	assert(bp->libsym->arch.type != bp->libsym->arch.type);
523	abort();
524}
525
526/* For some symbol types, we need to set up custom callbacks.  XXX we
527 * don't need PROC here, we can store the data in BP if it is of
528 * interest to us.  */
529int
530arch_breakpoint_init(struct Process *proc, struct breakpoint *bp)
531{
532	if (proc->e_machine == EM_PPC
533	    || bp->libsym == NULL)
534		return 0;
535
536	/* We could see LS_TOPLT_EXEC or LS_TOPLT_NONE (the latter
537	 * when we trace entry points), but not LS_TOPLT_POINT
538	 * anywhere on PPC.  */
539	if (bp->libsym->plt_type != LS_TOPLT_EXEC
540	    || bp->libsym->arch.type == PPC64PLT_STUB)
541		return 0;
542
543	static struct bp_callbacks cbs = {
544		.on_continue = ppc64_plt_bp_continue,
545	};
546	breakpoint_set_callbacks(bp, &cbs);
547	return 0;
548}
549
550void
551arch_breakpoint_destroy(struct breakpoint *bp)
552{
553}
554
555int
556arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp)
557{
558	retp->arch = sbp->arch;
559	return 0;
560}
561