plt.c revision 52dbfb161efeab85bddc880966db2f7af9b9cf9a
1#include <gelf.h>
2#include <sys/ptrace.h>
3#include <errno.h>
4#include <error.h>
5#include <inttypes.h>
6#include <assert.h>
7#include <string.h>
8
9#include "proc.h"
10#include "common.h"
11#include "library.h"
12#include "breakpoint.h"
13#include "linux-gnu/trace.h"
14
15/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
16 * new-style "secure" PLT.  We can tell one from the other by the
17 * flags on the .plt section.  If it's +X (executable), it's BSS PLT,
18 * otherwise it's secure.
19 *
20 * BSS PLT works the same way as most architectures: the .plt section
21 * contains trampolines and we put breakpoints to those.  With secure
22 * PLT, the .plt section doesn't contain instructions but addresses.
23 * The real PLT table is stored in .text.  Addresses of those PLT
24 * entries can be computed, and it fact that's what the glink deal
25 * below does.
26 *
27 * If not prelinked, BSS PLT entries in the .plt section contain
28 * zeroes that are overwritten by the dynamic linker during start-up.
29 * For that reason, ltrace realizes those breakpoints only after
30 * .start is hit.
31 *
32 * 64-bit PPC is more involved.  Program linker creates for each
33 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
34 * (where xxxxxxxx is a hexadecimal number).  That stub does the call
35 * dispatch: it loads an address of a function to call from the
36 * section .plt, and branches.  PLT entries themselves are essentially
37 * a curried call to the resolver.  When the symbol is resolved, the
38 * resolver updates the value stored in .plt, and the next time
39 * around, the stub calls the library function directly.  So we make
40 * at most one trip (none if the binary is prelinked) through each PLT
41 * entry, and correspondingly that is useless as a breakpoint site.
42 *
43 * Note the three confusing terms: stubs (that play the role of PLT
44 * entries), PLT entries, .plt section.
45 *
46 * We first check symbol tables and see if we happen to have stub
47 * symbols available.  If yes we just put breakpoints to those, and
48 * treat them as usual breakpoints.  The only tricky part is realizing
49 * that there can be more than one breakpoint per symbol.
50 *
51 * The case that we don't have the stub symbols available is harder.
52 * The following scheme uses two kinds of PLT breakpoints: unresolved
53 * and resolved (to some address).  When the process starts (or when
54 * we attach), we distribute unresolved PLT breakpoints to the PLT
55 * entries (not stubs).  Then we look in .plt, and for each entry
56 * whose value is different than the corresponding PLT entry address,
57 * we assume it was already resolved, and convert the breakpoint to
58 * resolved.  We also rewrite the resolved value in .plt back to the
59 * PLT address.
60 *
61 * When a PLT entry hits a resolved breakpoint (which happens because
62 * we put back the unresolved addresses to .plt), we move the
63 * instruction pointer to the corresponding address and continue the
64 * process as if nothing happened.
65 *
66 * When unresolved PLT entry is called for the first time, we need to
67 * catch the new value that the resolver will write to a .plt slot.
68 * We also need to prevent another thread from racing through and
69 * taking the branch without ltrace noticing.  So when unresolved PLT
70 * entry hits, we have to stop all threads.  We then single-step
71 * through the resolver, until the .plt slot changes.  When it does,
72 * we treat it the same way as above: convert the PLT breakpoint to
73 * resolved, and rewrite the .plt value back to PLT address.  We then
74 * start all threads again.
75 *
76 * In theory we might find the exact instruction that will update the
77 * .plt slot, and emulate it, updating the PLT breakpoint immediately,
78 * and then just skip it.  But that's even messier than the thread
79 * stopping business and single stepping that needs to be done.
80 *
81 * Short of doing this we really have to stop everyone.  There is no
82 * way around that.  Unless we know where the stubs are, we don't have
83 * a way to catch a thread that would use the window of opportunity
84 * between updating .plt and notifying ltrace about the singlestep.
85 */
86
87#define PPC_PLT_STUB_SIZE 16
88#define PPC64_PLT_STUB_SIZE 8 //xxx
89
90static inline int
91host_powerpc64()
92{
93#ifdef __powerpc64__
94	return 1;
95#else
96	return 0;
97#endif
98}
99
100GElf_Addr
101arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
102{
103	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
104		assert(lte->arch.plt_stub_vma != 0);
105		return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
106
107	} else if (lte->ehdr.e_machine == EM_PPC) {
108		return rela->r_offset;
109
110	} else {
111		/* If we get here, we don't have stub symbols.  In
112		 * that case we put brakpoints to PLT entries the same
113		 * as the PPC32 secure PLT case does.  */
114		assert(lte->arch.plt_stub_vma != 0);
115		return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx;
116	}
117}
118
119int
120arch_translate_address(struct Process *proc,
121		       target_address_t addr, target_address_t *ret)
122{
123	if (proc->e_machine == EM_PPC64) {
124		assert(host_powerpc64());
125		long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
126		fprintf(stderr, "arch_translate_address %p->%#lx\n",
127			addr, l);
128		if (l == -1 && errno) {
129			error(0, errno, ".opd translation of %p", addr);
130			return -1;
131		}
132		*ret = (target_address_t)l;
133		return 0;
134	}
135
136	*ret = addr;
137	return 0;
138}
139
140/* XXX Apparently PPC64 doesn't support PLT breakpoints.  */
141void *
142sym2addr(Process *proc, struct library_symbol *sym) {
143	void *addr = sym->enter_addr;
144	long pt_ret;
145
146	debug(3, 0);
147
148	if (sym->plt_type != LS_TOPLT_POINT) {
149		return addr;
150	}
151
152	if (proc->pid == 0) {
153		return 0;
154	}
155
156	if (options.debug >= 3) {
157		xinfdump(proc->pid, (void *)(((long)addr-32)&0xfffffff0),
158			 sizeof(void*)*8);
159	}
160
161	// On a PowerPC-64 system, a plt is three 64-bit words: the first is the
162	// 64-bit address of the routine.  Before the PLT has been initialized,
163	// this will be 0x0. In fact, the symbol table won't have the plt's
164	// address even.  Ater the PLT has been initialized, but before it has
165	// been resolved, the first word will be the address of the function in
166	// the dynamic linker that will reslove the PLT.  After the PLT is
167	// resolved, this will will be the address of the routine whose symbol
168	// is in the symbol table.
169
170	// On a PowerPC-32 system, there are two types of PLTs: secure (new) and
171	// non-secure (old).  For the secure case, the PLT is simply a pointer
172	// and we can treat it much as we do for the PowerPC-64 case.  For the
173	// non-secure case, the PLT is executable code and we can put the
174	// break-point right in the PLT.
175
176	pt_ret = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
177
178#if SIZEOF_LONG == 8
179	if (proc->mask_32bit) {
180		// Assume big-endian.
181		addr = (void *)((pt_ret >> 32) & 0xffffffff);
182	} else {
183		addr = (void *)pt_ret;
184	}
185#else
186	/* XXX Um, so where exactly are we dealing with the non-secure
187	   PLT thing?  */
188	addr = (void *)pt_ret;
189#endif
190
191	return addr;
192}
193
194static GElf_Addr
195get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
196{
197	Elf_Scn *ppcgot_sec = NULL;
198	GElf_Shdr ppcgot_shdr;
199	if (ppcgot != 0
200	    && elf_get_section_covering(lte, ppcgot,
201					&ppcgot_sec, &ppcgot_shdr) < 0)
202		// xxx should be the log out
203		fprintf(stderr,
204			"DT_PPC_GOT=%#" PRIx64 ", but no such section found.\n",
205			ppcgot);
206
207	if (ppcgot_sec != NULL) {
208		Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
209		if (data == NULL || data->d_size < 8 ) {
210			fprintf(stderr, "Couldn't read GOT data.\n");
211		} else {
212			// where PPCGOT begins in .got
213			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
214			assert(offset % 4 == 0);
215			uint32_t glink_vma;
216			if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
217				fprintf(stderr,
218					"Couldn't read glink VMA address"
219					" at %zd@GOT\n", offset);
220				return 0;
221			}
222			if (glink_vma != 0) {
223				debug(1, "PPC GOT glink_vma address: %#" PRIx32,
224				      glink_vma);
225				fprintf(stderr, "PPC GOT glink_vma "
226					"address: %#"PRIx32"\n", glink_vma);
227				return (GElf_Addr)glink_vma;
228			}
229		}
230	}
231
232	if (plt_data != NULL) {
233		uint32_t glink_vma;
234		if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
235			fprintf(stderr,
236				"Couldn't read glink VMA address at 0@.plt\n");
237			return 0;
238		}
239		debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
240		fprintf(stderr, ".plt glink_vma address: "
241			"%#"PRIx32"\n", glink_vma);
242		return (GElf_Addr)glink_vma;
243	}
244
245	return 0;
246}
247
248static int
249load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
250{
251	Elf_Scn *scn;
252	GElf_Shdr shdr;
253	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
254	    || scn == NULL) {
255	fail:
256		error(0, 0, "Couldn't get SHT_DYNAMIC: %s",
257		      elf_errmsg(-1));
258		return -1;
259	}
260
261	Elf_Data *data = elf_loaddata(scn, &shdr);
262	if (data == NULL)
263		goto fail;
264
265	size_t j;
266	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
267		GElf_Dyn dyn;
268		if (gelf_getdyn(data, j, &dyn) == NULL)
269			goto fail;
270
271		if(dyn.d_tag == tag) {
272			*valuep = dyn.d_un.d_ptr;
273			return 0;
274		}
275	}
276
277	return -1;
278}
279
280static int
281load_ppcgot(struct ltelf *lte, GElf_Addr *ppcgotp)
282{
283	return load_dynamic_entry(lte, DT_PPC_GOT, ppcgotp);
284}
285
286static int
287load_ppc64_glink(struct ltelf *lte, GElf_Addr *glinkp)
288{
289	return load_dynamic_entry(lte, DT_PPC64_GLINK, glinkp);
290}
291
292int
293arch_elf_init(struct ltelf *lte)
294{
295	lte->arch.secure_plt = !(lte->lte_flags & LTE_PLT_EXECUTABLE);
296	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
297		GElf_Addr ppcgot;
298		if (load_ppcgot(lte, &ppcgot) < 0) {
299			fprintf(stderr, "Couldn't find DT_PPC_GOT.\n");
300			return -1;
301		}
302		GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
303
304		assert (lte->relplt_size % 12 == 0);
305		size_t count = lte->relplt_size / 12; // size of RELA entry
306		lte->arch.plt_stub_vma = glink_vma
307			- (GElf_Addr)count * PPC_PLT_STUB_SIZE;
308		debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
309
310	} else if (lte->ehdr.e_machine == EM_PPC64) {
311		GElf_Addr glink_vma;
312		if (load_ppc64_glink(lte, &glink_vma) < 0) {
313			fprintf(stderr, "Couldn't find DT_PPC64_GLINK.\n");
314			return -1;
315		}
316
317		/* The first glink stub starts at offset 32.  */
318		lte->arch.plt_stub_vma = glink_vma + 32;
319	}
320
321	/* Override the value that we gleaned from flags on the .plt
322	 * section.  The PLT entries are in fact executable, they are
323	 * just not in .plt.  */
324	lte->lte_flags |= LTE_PLT_EXECUTABLE;
325
326	/* On PPC64, look for stub symbols in symbol table.  These are
327	 * called: xxxxxxxx.plt_call.callee_name@version+addend.  */
328	if (lte->ehdr.e_machine == EM_PPC64
329	    && lte->symtab != NULL && lte->strtab != NULL) {
330
331		/* N.B. We can't simply skip the symbols that we fail
332		 * to read or malloc.  There may be more than one stub
333		 * per symbol name, and if we failed in one but
334		 * succeeded in another, the PLT enabling code would
335		 * have no way to tell that something is missing.  We
336		 * could work around that, of course, but it doesn't
337		 * seem worth the trouble.  So if anything fails, we
338		 * just pretend that we don't have stub symbols at
339		 * all, as if the binary is stripped.  */
340
341		size_t i;
342		for (i = 0; i < lte->symtab_count; ++i) {
343			GElf_Sym sym;
344			if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
345				struct library_symbol *sym, *next;
346			fail:
347				for (sym = lte->arch.stubs; sym != NULL; ) {
348					next = sym->next;
349					library_symbol_destroy(sym);
350					free(sym);
351					sym = next;
352				}
353				lte->arch.stubs = NULL;
354				break;
355			}
356
357			const char *name = lte->strtab + sym.st_name;
358
359#define STUBN ".plt_call."
360			if ((name = strstr(name, STUBN)) == NULL)
361				continue;
362			name += sizeof(STUBN) - 1;
363#undef STUBN
364
365			size_t len;
366			const char *ver = strchr(name, '@');
367			if (ver != NULL) {
368				len = ver - name;
369
370			} else {
371				/* If there is "+" at all, check that
372				 * the symbol name ends in "+0".  */
373				const char *add = strrchr(name, '+');
374				if (add != NULL) {
375					assert(strcmp(add, "+0") == 0);
376					len = add - name;
377				} else {
378					len = strlen(name);
379				}
380			}
381
382			char *sym_name = strndup(name, len);
383			struct library_symbol *libsym = malloc(sizeof(*libsym));
384			if (sym_name == NULL || libsym == NULL) {
385				free(sym_name);
386				free(libsym);
387				goto fail;
388			}
389
390			target_address_t addr
391				= (target_address_t)sym.st_value + lte->bias;
392			library_symbol_init(libsym, addr, sym_name, 1,
393					    LS_TOPLT_EXEC);
394			libsym->arch.type = PPC64PLT_STUB;
395			libsym->next = lte->arch.stubs;
396			lte->arch.stubs = libsym;
397		}
398	}
399
400	return 0;
401}
402
403static int
404read_plt_slot_value(struct Process *proc, GElf_Addr addr, GElf_Addr *valp)
405{
406	/* on PPC32 we need to do things differently, but PPC64/PPC32
407	 * is currently not supported anyway.  */
408	assert(host_powerpc64());
409
410	long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
411	if (l == -1 && errno != 0) {
412		error(0, errno, "ptrace .plt slot value @%#" PRIx64, addr);
413		return -1;
414	}
415
416	*valp = (GElf_Addr)l;
417	return 0;
418}
419
420static int
421unresolve_plt_slot(struct Process *proc, GElf_Addr addr, GElf_Addr value)
422{
423	/* We only modify plt_entry[0], which holds the resolved
424	 * address of the routine.  We keep the TOC and environment
425	 * pointers intact.  Hence the only adjustment that we need to
426	 * do is to IP.  */
427	if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) {
428		error(0, errno, "unresolve .plt slot");
429		return -1;
430	}
431	return 0;
432}
433
434enum plt_status
435arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
436		       const char *a_name, GElf_Rela *rela, size_t ndx,
437		       struct library_symbol **ret)
438{
439	if (lte->ehdr.e_machine == EM_PPC)
440		return plt_default;
441
442	/* PPC64.  If we have stubs, we return a chain of breakpoint
443	 * sites, one for each stub that corresponds to this PLT
444	 * entry.  */
445	struct library_symbol *chain = NULL;
446	struct library_symbol **symp;
447	for (symp = &lte->arch.stubs; *symp != NULL; ) {
448		struct library_symbol *sym = *symp;
449		if (strcmp(sym->name, a_name) != 0) {
450			symp = &(*symp)->next;
451			continue;
452		}
453
454		/* Re-chain the symbol from stubs to CHAIN.  */
455		*symp = sym->next;
456		sym->next = chain;
457		chain = sym;
458	}
459
460	if (chain != NULL) {
461		struct library_symbol *sym;
462		for (sym = chain; sym != NULL; sym = sym->next)
463			fprintf(stderr, "match %s --> %p\n",
464				sym->name, sym->enter_addr);
465		for (sym = lte->arch.stubs; sym != NULL; sym = sym->next)
466			fprintf(stderr, "remains %s --> %p\n",
467				sym->name, sym->enter_addr);
468
469		*ret = chain;
470		return plt_ok;
471	}
472
473	/* We don't have stub symbols.  Find corresponding .plt slot,
474	 * and check whether it contains the corresponding PLT address
475	 * (or 0 if the dynamic linker hasn't run yet).  N.B. we don't
476	 * want read this from ELF file, but from process image.  That
477	 * makes a difference if we are attaching to a running
478	 * process.  */
479
480	GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela);
481	GElf_Addr plt_slot_addr = rela->r_offset;
482	assert(plt_slot_addr >= lte->plt_addr
483	       || plt_slot_addr < lte->plt_addr + lte->plt_size);
484
485	GElf_Addr plt_slot_value;
486	if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0)
487		return plt_fail;
488
489	char *name = strdup(a_name);
490	struct library_symbol *libsym = malloc(sizeof(*libsym));
491	if (name == NULL || libsym == NULL) {
492		error(0, errno, "allocation for .plt slot");
493	fail:
494		free(name);
495		free(libsym);
496		return plt_fail;
497	}
498
499	library_symbol_init(libsym, (target_address_t)plt_entry_addr,
500			    name, 1, LS_TOPLT_EXEC);
501	libsym->arch.plt_slot_addr = plt_slot_addr;
502
503	if (plt_slot_value == plt_entry_addr || plt_slot_value == 0) {
504		libsym->arch.type = PPC64PLT_UNRESOLVED;
505		libsym->arch.resolved_value = plt_entry_addr;
506
507	} else {
508		/* Unresolve the .plt slot.  If the binary was
509		 * prelinked, this makes the code invalid, because in
510		 * case of prelinked binary, the dynamic linker
511		 * doesn't update .plt[0] and .plt[1] with addresses
512		 * of the resover.  But we don't care, we will never
513		 * need to enter the resolver.  That just means that
514		 * we have to un-un-resolve this back before we
515		 * detach, which is nothing new: we already need to
516		 * retract breakpoints.  */
517
518		if (unresolve_plt_slot(proc, plt_slot_addr, plt_entry_addr) < 0)
519			goto fail;
520		libsym->arch.type = PPC64PLT_RESOLVED;
521		libsym->arch.resolved_value = plt_slot_value;
522	}
523
524	*ret = libsym;
525	return plt_ok;
526}
527
528void
529arch_elf_destroy(struct ltelf *lte)
530{
531	struct library_symbol *sym;
532	for (sym = lte->arch.stubs; sym != NULL; ) {
533		struct library_symbol *next = sym->next;
534		library_symbol_destroy(sym);
535		free(sym);
536		sym = next;
537	}
538}
539
540static enum callback_status
541keep_stepping_p(struct process_stopping_handler *self)
542{
543	struct Process *proc = self->task_enabling_breakpoint;
544	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
545	GElf_Addr value;
546	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
547		return CBS_FAIL;
548
549	/* In UNRESOLVED state, the RESOLVED_VALUE in fact contains
550	 * the PLT entry value.  */
551	if (value == libsym->arch.resolved_value)
552		return CBS_CONT;
553
554	/* The .plt slot got resolved!  We can migrate the breakpoint
555	 * to RESOLVED and stop single-stepping.  */
556	if (unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
557			       libsym->arch.resolved_value) < 0)
558		return CBS_FAIL;
559	libsym->arch.type = PPC64PLT_RESOLVED;
560	libsym->arch.resolved_value = value;
561
562	return CBS_STOP;
563}
564
565static enum callback_status
566yes(struct process_stopping_handler *self)
567{
568	return CBS_CONT;
569}
570
571static void
572ppc64_plt_bp_continue(struct breakpoint *bp, struct Process *proc)
573{
574	fprintf(stderr, "ppc64_plt_bp_continue\n");
575
576	switch (bp->libsym->arch.type) {
577		target_address_t rv;
578
579	case PPC64PLT_STUB:
580		/* We should never get here.  */
581		abort();
582
583	case PPC64PLT_UNRESOLVED:
584		if (process_install_stopping_handler(proc, bp, NULL,
585						     &keep_stepping_p,
586						     &yes) < 0) {
587			perror("ppc64_unresolved_bp_continue: couldn't install"
588			       " event handler");
589			continue_after_breakpoint(proc, bp);
590		}
591		return;
592
593	case PPC64PLT_RESOLVED:
594		fprintf(stderr, "ppc64_resolved_bp_continue\n");
595		rv = (target_address_t)bp->libsym->arch.resolved_value;
596		set_instruction_pointer(proc, rv);
597		continue_process(proc->pid);
598	}
599}
600
601/* For some symbol types, we need to set up custom callbacks.  XXX we
602 * don't need PROC here, we can store the data in BP if it is of
603 * interest to us.  */
604int
605arch_breakpoint_init(struct Process *proc, struct breakpoint *bp)
606{
607	if (proc->e_machine == EM_PPC
608	    || bp->libsym == NULL
609	    || bp->libsym->arch.type == PPC64PLT_STUB)
610		return 0;
611
612	static struct bp_callbacks cbs = {
613		.on_continue = ppc64_plt_bp_continue,
614	};
615	breakpoint_set_callbacks(bp, &cbs);
616	return 0;
617}
618
619void
620arch_breakpoint_destroy(struct breakpoint *bp)
621{
622}
623