plt.c revision 8b00d5bb6a0925ece06aad0d9df0a85e8dbd7b57
1#include <gelf.h>
2#include <sys/ptrace.h>
3#include <errno.h>
4#include <error.h>
5#include <inttypes.h>
6#include <assert.h>
7#include <string.h>
8
9#include "proc.h"
10#include "common.h"
11#include "library.h"
12#include "breakpoint.h"
13#include "linux-gnu/trace.h"
14
15/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
16 * new-style "secure" PLT.  We can tell one from the other by the
17 * flags on the .plt section.  If it's +X (executable), it's BSS PLT,
18 * otherwise it's secure.
19 *
20 * BSS PLT works the same way as most architectures: the .plt section
21 * contains trampolines and we put breakpoints to those.  With secure
22 * PLT, the .plt section doesn't contain instructions but addresses.
23 * The real PLT table is stored in .text.  Addresses of those PLT
24 * entries can be computed, and it fact that's what the glink deal
25 * below does.
26 *
27 * If not prelinked, BSS PLT entries in the .plt section contain
28 * zeroes that are overwritten by the dynamic linker during start-up.
29 * For that reason, ltrace realizes those breakpoints only after
30 * .start is hit.
31 *
32 * 64-bit PPC is more involved.  Program linker creates for each
33 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
34 * (where xxxxxxxx is a hexadecimal number).  That stub does the call
35 * dispatch: it loads an address of a function to call from the
36 * section .plt, and branches.  PLT entries themselves are essentially
37 * a curried call to the resolver.  When the symbol is resolved, the
38 * resolver updates the value stored in .plt, and the next time
39 * around, the stub calls the library function directly.  So we make
40 * at most one trip (none if the binary is prelinked) through each PLT
41 * entry, and correspondingly that is useless as a breakpoint site.
42 *
43 * Note the three confusing terms: stubs (that play the role of PLT
44 * entries), PLT entries, .plt section.
45 *
46 * We first check symbol tables and see if we happen to have stub
47 * symbols available.  If yes we just put breakpoints to those, and
48 * treat them as usual breakpoints.  The only tricky part is realizing
49 * that there can be more than one breakpoint per symbol.
50 *
51 * The case that we don't have the stub symbols available is harder.
52 * The following scheme uses two kinds of PLT breakpoints: unresolved
53 * and resolved (to some address).  When the process starts (or when
54 * we attach), we distribute unresolved PLT breakpoints to the PLT
55 * entries (not stubs).  Then we look in .plt, and for each entry
56 * whose value is different than the corresponding PLT entry address,
57 * we assume it was already resolved, and convert the breakpoint to
58 * resolved.  We also rewrite the resolved value in .plt back to the
59 * PLT address.
60 *
61 * When a PLT entry hits a resolved breakpoint (which happens because
62 * we put back the unresolved addresses to .plt), we move the
63 * instruction pointer to the corresponding address and continue the
64 * process as if nothing happened.
65 *
66 * When unresolved PLT entry is called for the first time, we need to
67 * catch the new value that the resolver will write to a .plt slot.
68 * We also need to prevent another thread from racing through and
69 * taking the branch without ltrace noticing.  So when unresolved PLT
70 * entry hits, we have to stop all threads.  We then single-step
71 * through the resolver, until the .plt slot changes.  When it does,
72 * we treat it the same way as above: convert the PLT breakpoint to
73 * resolved, and rewrite the .plt value back to PLT address.  We then
74 * start all threads again.
75 *
76 * In theory we might find the exact instruction that will update the
77 * .plt slot, and emulate it, updating the PLT breakpoint immediately,
78 * and then just skip it.  But that's even messier than the thread
79 * stopping business and single stepping that needs to be done.
80 *
81 * Short of doing this we really have to stop everyone.  There is no
82 * way around that.  Unless we know where the stubs are, we don't have
83 * a way to catch a thread that would use the window of opportunity
84 * between updating .plt and notifying ltrace about the singlestep.
85 */
86
87#define PPC_PLT_STUB_SIZE 16
88#define PPC64_PLT_STUB_SIZE 8 //xxx
89
90static inline int
91host_powerpc64()
92{
93#ifdef __powerpc64__
94	return 1;
95#else
96	return 0;
97#endif
98}
99
100GElf_Addr
101arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
102{
103	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
104		assert(lte->arch.plt_stub_vma != 0);
105		return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
106
107	} else if (lte->ehdr.e_machine == EM_PPC) {
108		return rela->r_offset;
109
110	} else {
111		/* If we get here, we don't have stub symbols.  In
112		 * that case we put brakpoints to PLT entries the same
113		 * as the PPC32 secure PLT case does.  */
114		assert(lte->arch.plt_stub_vma != 0);
115		return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx;
116	}
117}
118
119int
120arch_translate_address(struct Process *proc,
121		       target_address_t addr, target_address_t *ret)
122{
123	if (proc->e_machine == EM_PPC64) {
124		assert(host_powerpc64());
125		long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
126		if (l == -1 && errno) {
127			error(0, errno, ".opd translation of %p", addr);
128			return -1;
129		}
130		*ret = (target_address_t)l;
131		return 0;
132	}
133
134	*ret = addr;
135	return 0;
136}
137
138/* XXX Apparently PPC64 doesn't support PLT breakpoints.  */
139void *
140sym2addr(Process *proc, struct library_symbol *sym) {
141	void *addr = sym->enter_addr;
142	long pt_ret;
143
144	debug(3, 0);
145
146	if (sym->plt_type != LS_TOPLT_POINT) {
147		return addr;
148	}
149
150	if (proc->pid == 0) {
151		return 0;
152	}
153
154	if (options.debug >= 3) {
155		xinfdump(proc->pid, (void *)(((long)addr-32)&0xfffffff0),
156			 sizeof(void*)*8);
157	}
158
159	// On a PowerPC-64 system, a plt is three 64-bit words: the first is the
160	// 64-bit address of the routine.  Before the PLT has been initialized,
161	// this will be 0x0. In fact, the symbol table won't have the plt's
162	// address even.  Ater the PLT has been initialized, but before it has
163	// been resolved, the first word will be the address of the function in
164	// the dynamic linker that will reslove the PLT.  After the PLT is
165	// resolved, this will will be the address of the routine whose symbol
166	// is in the symbol table.
167
168	// On a PowerPC-32 system, there are two types of PLTs: secure (new) and
169	// non-secure (old).  For the secure case, the PLT is simply a pointer
170	// and we can treat it much as we do for the PowerPC-64 case.  For the
171	// non-secure case, the PLT is executable code and we can put the
172	// break-point right in the PLT.
173
174	pt_ret = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
175
176#if SIZEOF_LONG == 8
177	if (proc->mask_32bit) {
178		// Assume big-endian.
179		addr = (void *)((pt_ret >> 32) & 0xffffffff);
180	} else {
181		addr = (void *)pt_ret;
182	}
183#else
184	/* XXX Um, so where exactly are we dealing with the non-secure
185	   PLT thing?  */
186	addr = (void *)pt_ret;
187#endif
188
189	return addr;
190}
191
192static GElf_Addr
193get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
194{
195	Elf_Scn *ppcgot_sec = NULL;
196	GElf_Shdr ppcgot_shdr;
197	if (ppcgot != 0
198	    && elf_get_section_covering(lte, ppcgot,
199					&ppcgot_sec, &ppcgot_shdr) < 0)
200		error(0, 0, "DT_PPC_GOT=%#"PRIx64", but no such section found",
201		      ppcgot);
202
203	if (ppcgot_sec != NULL) {
204		Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
205		if (data == NULL || data->d_size < 8 ) {
206			error(0, 0, "couldn't read GOT data");
207		} else {
208			// where PPCGOT begins in .got
209			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
210			assert(offset % 4 == 0);
211			uint32_t glink_vma;
212			if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
213				error(0, 0, "couldn't read glink VMA address"
214				      " at %zd@GOT", offset);
215				return 0;
216			}
217			if (glink_vma != 0) {
218				debug(1, "PPC GOT glink_vma address: %#" PRIx32,
219				      glink_vma);
220				return (GElf_Addr)glink_vma;
221			}
222		}
223	}
224
225	if (plt_data != NULL) {
226		uint32_t glink_vma;
227		if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
228			error(0, 0, "couldn't read glink VMA address");
229			return 0;
230		}
231		debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
232		return (GElf_Addr)glink_vma;
233	}
234
235	return 0;
236}
237
238static int
239load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
240{
241	Elf_Scn *scn;
242	GElf_Shdr shdr;
243	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
244	    || scn == NULL) {
245	fail:
246		error(0, 0, "Couldn't get SHT_DYNAMIC: %s",
247		      elf_errmsg(-1));
248		return -1;
249	}
250
251	Elf_Data *data = elf_loaddata(scn, &shdr);
252	if (data == NULL)
253		goto fail;
254
255	size_t j;
256	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
257		GElf_Dyn dyn;
258		if (gelf_getdyn(data, j, &dyn) == NULL)
259			goto fail;
260
261		if(dyn.d_tag == tag) {
262			*valuep = dyn.d_un.d_ptr;
263			return 0;
264		}
265	}
266
267	return -1;
268}
269
270static int
271load_ppcgot(struct ltelf *lte, GElf_Addr *ppcgotp)
272{
273	return load_dynamic_entry(lte, DT_PPC_GOT, ppcgotp);
274}
275
276static int
277load_ppc64_glink(struct ltelf *lte, GElf_Addr *glinkp)
278{
279	return load_dynamic_entry(lte, DT_PPC64_GLINK, glinkp);
280}
281
282int
283arch_elf_init(struct ltelf *lte)
284{
285	lte->arch.secure_plt = !(lte->lte_flags & LTE_PLT_EXECUTABLE);
286	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
287		GElf_Addr ppcgot;
288		if (load_ppcgot(lte, &ppcgot) < 0) {
289			error(0, 0, "couldn't find DT_PPC_GOT");
290			return -1;
291		}
292		GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
293
294		assert (lte->relplt_size % 12 == 0);
295		size_t count = lte->relplt_size / 12; // size of RELA entry
296		lte->arch.plt_stub_vma = glink_vma
297			- (GElf_Addr)count * PPC_PLT_STUB_SIZE;
298		debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
299
300	} else if (lte->ehdr.e_machine == EM_PPC64) {
301		GElf_Addr glink_vma;
302		if (load_ppc64_glink(lte, &glink_vma) < 0) {
303			error(0, 0, "couldn't find DT_PPC64_GLINK");
304			return -1;
305		}
306
307		/* The first glink stub starts at offset 32.  */
308		lte->arch.plt_stub_vma = glink_vma + 32;
309	}
310
311	/* Override the value that we gleaned from flags on the .plt
312	 * section.  The PLT entries are in fact executable, they are
313	 * just not in .plt.  */
314	lte->lte_flags |= LTE_PLT_EXECUTABLE;
315
316	/* On PPC64, look for stub symbols in symbol table.  These are
317	 * called: xxxxxxxx.plt_call.callee_name@version+addend.  */
318	if (lte->ehdr.e_machine == EM_PPC64
319	    && lte->symtab != NULL && lte->strtab != NULL) {
320
321		/* N.B. We can't simply skip the symbols that we fail
322		 * to read or malloc.  There may be more than one stub
323		 * per symbol name, and if we failed in one but
324		 * succeeded in another, the PLT enabling code would
325		 * have no way to tell that something is missing.  We
326		 * could work around that, of course, but it doesn't
327		 * seem worth the trouble.  So if anything fails, we
328		 * just pretend that we don't have stub symbols at
329		 * all, as if the binary is stripped.  */
330
331		size_t i;
332		for (i = 0; i < lte->symtab_count; ++i) {
333			GElf_Sym sym;
334			if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
335				struct library_symbol *sym, *next;
336			fail:
337				for (sym = lte->arch.stubs; sym != NULL; ) {
338					next = sym->next;
339					library_symbol_destroy(sym);
340					free(sym);
341					sym = next;
342				}
343				lte->arch.stubs = NULL;
344				break;
345			}
346
347			const char *name = lte->strtab + sym.st_name;
348
349#define STUBN ".plt_call."
350			if ((name = strstr(name, STUBN)) == NULL)
351				continue;
352			name += sizeof(STUBN) - 1;
353#undef STUBN
354
355			size_t len;
356			const char *ver = strchr(name, '@');
357			if (ver != NULL) {
358				len = ver - name;
359
360			} else {
361				/* If there is "+" at all, check that
362				 * the symbol name ends in "+0".  */
363				const char *add = strrchr(name, '+');
364				if (add != NULL) {
365					assert(strcmp(add, "+0") == 0);
366					len = add - name;
367				} else {
368					len = strlen(name);
369				}
370			}
371
372			char *sym_name = strndup(name, len);
373			struct library_symbol *libsym = malloc(sizeof(*libsym));
374			if (sym_name == NULL || libsym == NULL) {
375				free(sym_name);
376				free(libsym);
377				goto fail;
378			}
379
380			target_address_t addr
381				= (target_address_t)sym.st_value + lte->bias;
382			library_symbol_init(libsym, addr, sym_name, 1,
383					    LS_TOPLT_EXEC);
384			libsym->arch.type = PPC64PLT_STUB;
385			libsym->next = lte->arch.stubs;
386			lte->arch.stubs = libsym;
387		}
388	}
389
390	return 0;
391}
392
393static int
394read_plt_slot_value(struct Process *proc, GElf_Addr addr, GElf_Addr *valp)
395{
396	/* on PPC32 we need to do things differently, but PPC64/PPC32
397	 * is currently not supported anyway.  */
398	assert(host_powerpc64());
399
400	long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
401	if (l == -1 && errno != 0) {
402		error(0, errno, "ptrace .plt slot value @%#" PRIx64, addr);
403		return -1;
404	}
405
406	*valp = (GElf_Addr)l;
407	return 0;
408}
409
410static int
411unresolve_plt_slot(struct Process *proc, GElf_Addr addr, GElf_Addr value)
412{
413	/* We only modify plt_entry[0], which holds the resolved
414	 * address of the routine.  We keep the TOC and environment
415	 * pointers intact.  Hence the only adjustment that we need to
416	 * do is to IP.  */
417	if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) {
418		error(0, errno, "unresolve .plt slot");
419		return -1;
420	}
421	return 0;
422}
423
424enum plt_status
425arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
426		       const char *a_name, GElf_Rela *rela, size_t ndx,
427		       struct library_symbol **ret)
428{
429	if (lte->ehdr.e_machine == EM_PPC)
430		return plt_default;
431
432	/* PPC64.  If we have stubs, we return a chain of breakpoint
433	 * sites, one for each stub that corresponds to this PLT
434	 * entry.  */
435	struct library_symbol *chain = NULL;
436	struct library_symbol **symp;
437	for (symp = &lte->arch.stubs; *symp != NULL; ) {
438		struct library_symbol *sym = *symp;
439		if (strcmp(sym->name, a_name) != 0) {
440			symp = &(*symp)->next;
441			continue;
442		}
443
444		/* Re-chain the symbol from stubs to CHAIN.  */
445		*symp = sym->next;
446		sym->next = chain;
447		chain = sym;
448	}
449
450	if (chain != NULL) {
451		*ret = chain;
452		return plt_ok;
453	}
454
455	/* We don't have stub symbols.  Find corresponding .plt slot,
456	 * and check whether it contains the corresponding PLT address
457	 * (or 0 if the dynamic linker hasn't run yet).  N.B. we don't
458	 * want read this from ELF file, but from process image.  That
459	 * makes a difference if we are attaching to a running
460	 * process.  */
461
462	GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela);
463	GElf_Addr plt_slot_addr = rela->r_offset;
464	assert(plt_slot_addr >= lte->plt_addr
465	       || plt_slot_addr < lte->plt_addr + lte->plt_size);
466
467	GElf_Addr plt_slot_value;
468	if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0)
469		return plt_fail;
470
471	char *name = strdup(a_name);
472	struct library_symbol *libsym = malloc(sizeof(*libsym));
473	if (name == NULL || libsym == NULL) {
474		error(0, errno, "allocation for .plt slot");
475	fail:
476		free(name);
477		free(libsym);
478		return plt_fail;
479	}
480
481	library_symbol_init(libsym, (target_address_t)plt_entry_addr,
482			    name, 1, LS_TOPLT_EXEC);
483	libsym->arch.plt_slot_addr = plt_slot_addr;
484
485	if (plt_slot_value == plt_entry_addr || plt_slot_value == 0) {
486		libsym->arch.type = PPC64PLT_UNRESOLVED;
487		libsym->arch.resolved_value = plt_entry_addr;
488
489	} else {
490		/* Unresolve the .plt slot.  If the binary was
491		 * prelinked, this makes the code invalid, because in
492		 * case of prelinked binary, the dynamic linker
493		 * doesn't update .plt[0] and .plt[1] with addresses
494		 * of the resover.  But we don't care, we will never
495		 * need to enter the resolver.  That just means that
496		 * we have to un-un-resolve this back before we
497		 * detach, which is nothing new: we already need to
498		 * retract breakpoints.  */
499
500		if (unresolve_plt_slot(proc, plt_slot_addr, plt_entry_addr) < 0)
501			goto fail;
502		libsym->arch.type = PPC64PLT_RESOLVED;
503		libsym->arch.resolved_value = plt_slot_value;
504	}
505
506	*ret = libsym;
507	return plt_ok;
508}
509
510void
511arch_elf_destroy(struct ltelf *lte)
512{
513	struct library_symbol *sym;
514	for (sym = lte->arch.stubs; sym != NULL; ) {
515		struct library_symbol *next = sym->next;
516		library_symbol_destroy(sym);
517		free(sym);
518		sym = next;
519	}
520}
521
522static enum callback_status
523keep_stepping_p(struct process_stopping_handler *self)
524{
525	struct Process *proc = self->task_enabling_breakpoint;
526	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
527	GElf_Addr value;
528	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
529		return CBS_FAIL;
530
531	/* In UNRESOLVED state, the RESOLVED_VALUE in fact contains
532	 * the PLT entry value.  */
533	if (value == libsym->arch.resolved_value)
534		return CBS_CONT;
535
536	/* The .plt slot got resolved!  We can migrate the breakpoint
537	 * to RESOLVED and stop single-stepping.  */
538	if (unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
539			       libsym->arch.resolved_value) < 0)
540		return CBS_FAIL;
541	libsym->arch.type = PPC64PLT_RESOLVED;
542	libsym->arch.resolved_value = value;
543
544	return CBS_STOP;
545}
546
547static void
548ppc64_plt_bp_continue(struct breakpoint *bp, struct Process *proc)
549{
550	switch (bp->libsym->arch.type) {
551		target_address_t rv;
552
553	case PPC64PLT_STUB:
554		/* We should never get here.  */
555		abort();
556
557	case PPC64PLT_UNRESOLVED:
558		if (process_install_stopping_handler(proc, bp, NULL,
559						     &keep_stepping_p,
560						     NULL) < 0) {
561			perror("ppc64_unresolved_bp_continue: couldn't install"
562			       " event handler");
563			continue_after_breakpoint(proc, bp);
564		}
565		return;
566
567	case PPC64PLT_RESOLVED:
568		rv = (target_address_t)bp->libsym->arch.resolved_value;
569		set_instruction_pointer(proc, rv);
570		continue_process(proc->pid);
571	}
572}
573
574/* For some symbol types, we need to set up custom callbacks.  XXX we
575 * don't need PROC here, we can store the data in BP if it is of
576 * interest to us.  */
577int
578arch_breakpoint_init(struct Process *proc, struct breakpoint *bp)
579{
580	if (proc->e_machine == EM_PPC
581	    || bp->libsym == NULL)
582		return 0;
583
584	/* We could see LS_TOPLT_EXEC or LS_TOPLT_NONE (the latter
585	 * when we trace entry points), but not LS_TOPLT_POINT
586	 * anywhere on PPC.  */
587	assert(bp->libsym->plt_type != LS_TOPLT_POINT);
588	if (bp->libsym->plt_type != LS_TOPLT_EXEC
589	    || bp->libsym->arch.type == PPC64PLT_STUB)
590		return 0;
591
592	static struct bp_callbacks cbs = {
593		.on_continue = ppc64_plt_bp_continue,
594	};
595	breakpoint_set_callbacks(bp, &cbs);
596	return 0;
597}
598
599void
600arch_breakpoint_destroy(struct breakpoint *bp)
601{
602}
603