plt.c revision b64b5c7b6f4a368ccaf60507090192845221a3be
1#include <gelf.h>
2#include <sys/ptrace.h>
3#include <errno.h>
4#include <error.h>
5#include <inttypes.h>
6#include <assert.h>
7#include <string.h>
8
9#include "proc.h"
10#include "common.h"
11#include "library.h"
12#include "breakpoint.h"
13
14/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
15 * new-style "secure" PLT.  We can tell one from the other by the
16 * flags on the .plt section.  If it's +X (executable), it's BSS PLT,
17 * otherwise it's secure.
18 *
19 * BSS PLT works the same way as most architectures: the .plt section
20 * contains trampolines and we put breakpoints to those.  With secure
21 * PLT, the .plt section doesn't contain instructions but addresses.
22 * The real PLT table is stored in .text.  Addresses of those PLT
23 * entries can be computed, and it fact that's what the glink deal
24 * below does.
25 *
26 * If not prelinked, BSS PLT entries in the .plt section contain
27 * zeroes that are overwritten by the dynamic linker during start-up.
28 * For that reason, ltrace realizes those breakpoints only after
29 * .start is hit.
30 *
31 * 64-bit PPC is more involved.  Program linker creates for each
32 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
33 * (where xxxxxxxx is a hexadecimal number).  That stub does the call
34 * dispatch: it loads an address of a function to call from the
35 * section .plt, and branches.  PLT entries themselves are essentially
36 * a curried call to the resolver.  When the symbol is resolved, the
37 * resolver updates the value stored in .plt, and the next time
38 * around, the stub calls the library function directly.  So we make
39 * at most one trip (none if the binary is prelinked) through each PLT
40 * entry, and correspondingly that is useless as a breakpoint site.
41 *
42 * Note the three confusing terms: stubs (that play the role of PLT
43 * entries), PLT entries, .plt section.
44 *
45 * We first check symbol tables and see if we happen to have stub
46 * symbols available.  If yes we just put breakpoints to those, and
47 * treat them as usual breakpoints.  The only tricky part is realizing
48 * that there can be more than one breakpoint per symbol.
49 *
50 * The case that we don't have the stub symbols available is harder.
51 * The following scheme uses two kinds of PLT breakpoints: unresolved
52 * and resolved (to some address).  When the process starts (or when
53 * we attach), we distribute unresolved PLT breakpoints to the PLT
54 * entries (not stubs).  Then we look in .plt, and for each entry
55 * whose value is different than the corresponding PLT entry address,
56 * we assume it was already resolved, and convert the breakpoint to
57 * resolved.  We also rewrite the resolved value in .plt back to the
58 * PLT address.
59 *
60 * When a PLT entry hits a resolved breakpoint (which happens because
61 * we put back the unresolved addresses to .plt), we move the
62 * instruction pointer to the corresponding address and continue the
63 * process as if nothing happened.
64 *
65 * When unresolved PLT entry is called for the first time, we need to
66 * catch the new value that the resolver will write to a .plt slot.
67 * We also need to prevent another thread from racing through and
68 * taking the branch without ltrace noticing.  So when unresolved PLT
69 * entry hits, we have to stop all threads.  We then single-step
70 * through the resolver, until the .plt slot changes.  When it does,
71 * we treat it the same way as above: convert the PLT breakpoint to
72 * resolved, and rewrite the .plt value back to PLT address.  We then
73 * start all threads again.
74 *
75 * In theory we might find the exact instruction that will update the
76 * .plt slot, and emulate it, updating the PLT breakpoint immediately,
77 * and then just skip it.  But that's even messier than the thread
78 * stopping business and single stepping that needs to be done.
79 */
80
81#define PPC_PLT_STUB_SIZE 16
82#define PPC64_PLT_STUB_SIZE 8 //xxx
83
84static inline int
85host_powerpc64()
86{
87#ifdef __powerpc64__
88	return 1;
89#else
90	return 0;
91#endif
92}
93
94GElf_Addr
95arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
96{
97	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
98		assert(lte->arch.plt_stub_vma != 0);
99		return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
100
101	} else if (lte->ehdr.e_machine == EM_PPC) {
102		return rela->r_offset;
103
104	} else {
105		/* If we get here, we don't have stub symbols.  In
106		 * that case we put brakpoints to PLT entries the same
107		 * as the PPC32 secure PLT case does.  */
108		assert(lte->arch.plt_stub_vma != 0);
109		return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx;
110	}
111}
112
113int
114arch_translate_address(struct Process *proc,
115		       target_address_t addr, target_address_t *ret)
116{
117	if (proc->e_machine == EM_PPC64) {
118		assert(host_powerpc64());
119		long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
120		fprintf(stderr, "arch_translate_address %p->%#lx\n",
121			addr, l);
122		if (l == -1 && errno) {
123			error(0, errno, ".opd translation of %p", addr);
124			return -1;
125		}
126		*ret = (target_address_t)l;
127		return 0;
128	}
129
130	*ret = addr;
131	return 0;
132}
133
134/* XXX Apparently PPC64 doesn't support PLT breakpoints.  */
135void *
136sym2addr(Process *proc, struct library_symbol *sym) {
137	void *addr = sym->enter_addr;
138	long pt_ret;
139
140	debug(3, 0);
141
142	if (sym->plt_type != LS_TOPLT_POINT) {
143		return addr;
144	}
145
146	if (proc->pid == 0) {
147		return 0;
148	}
149
150	if (options.debug >= 3) {
151		xinfdump(proc->pid, (void *)(((long)addr-32)&0xfffffff0),
152			 sizeof(void*)*8);
153	}
154
155	// On a PowerPC-64 system, a plt is three 64-bit words: the first is the
156	// 64-bit address of the routine.  Before the PLT has been initialized,
157	// this will be 0x0. In fact, the symbol table won't have the plt's
158	// address even.  Ater the PLT has been initialized, but before it has
159	// been resolved, the first word will be the address of the function in
160	// the dynamic linker that will reslove the PLT.  After the PLT is
161	// resolved, this will will be the address of the routine whose symbol
162	// is in the symbol table.
163
164	// On a PowerPC-32 system, there are two types of PLTs: secure (new) and
165	// non-secure (old).  For the secure case, the PLT is simply a pointer
166	// and we can treat it much as we do for the PowerPC-64 case.  For the
167	// non-secure case, the PLT is executable code and we can put the
168	// break-point right in the PLT.
169
170	pt_ret = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
171
172#if SIZEOF_LONG == 8
173	if (proc->mask_32bit) {
174		// Assume big-endian.
175		addr = (void *)((pt_ret >> 32) & 0xffffffff);
176	} else {
177		addr = (void *)pt_ret;
178	}
179#else
180	/* XXX Um, so where exactly are we dealing with the non-secure
181	   PLT thing?  */
182	addr = (void *)pt_ret;
183#endif
184
185	return addr;
186}
187
188static GElf_Addr
189get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
190{
191	Elf_Scn *ppcgot_sec = NULL;
192	GElf_Shdr ppcgot_shdr;
193	if (ppcgot != 0
194	    && elf_get_section_covering(lte, ppcgot,
195					&ppcgot_sec, &ppcgot_shdr) < 0)
196		// xxx should be the log out
197		fprintf(stderr,
198			"DT_PPC_GOT=%#" PRIx64 ", but no such section found.\n",
199			ppcgot);
200
201	if (ppcgot_sec != NULL) {
202		Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
203		if (data == NULL || data->d_size < 8 ) {
204			fprintf(stderr, "Couldn't read GOT data.\n");
205		} else {
206			// where PPCGOT begins in .got
207			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
208			assert(offset % 4 == 0);
209			uint32_t glink_vma;
210			if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
211				fprintf(stderr,
212					"Couldn't read glink VMA address"
213					" at %zd@GOT\n", offset);
214				return 0;
215			}
216			if (glink_vma != 0) {
217				debug(1, "PPC GOT glink_vma address: %#" PRIx32,
218				      glink_vma);
219				fprintf(stderr, "PPC GOT glink_vma "
220					"address: %#"PRIx32"\n", glink_vma);
221				return (GElf_Addr)glink_vma;
222			}
223		}
224	}
225
226	if (plt_data != NULL) {
227		uint32_t glink_vma;
228		if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
229			fprintf(stderr,
230				"Couldn't read glink VMA address at 0@.plt\n");
231			return 0;
232		}
233		debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
234		fprintf(stderr, ".plt glink_vma address: "
235			"%#"PRIx32"\n", glink_vma);
236		return (GElf_Addr)glink_vma;
237	}
238
239	return 0;
240}
241
242static int
243load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
244{
245	Elf_Scn *scn;
246	GElf_Shdr shdr;
247	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
248	    || scn == NULL) {
249	fail:
250		error(0, 0, "Couldn't get SHT_DYNAMIC: %s",
251		      elf_errmsg(-1));
252		return -1;
253	}
254
255	Elf_Data *data = elf_loaddata(scn, &shdr);
256	if (data == NULL)
257		goto fail;
258
259	size_t j;
260	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
261		GElf_Dyn dyn;
262		if (gelf_getdyn(data, j, &dyn) == NULL)
263			goto fail;
264
265		if(dyn.d_tag == tag) {
266			*valuep = dyn.d_un.d_ptr;
267			return 0;
268		}
269	}
270
271	return -1;
272}
273
274static int
275load_ppcgot(struct ltelf *lte, GElf_Addr *ppcgotp)
276{
277	return load_dynamic_entry(lte, DT_PPC_GOT, ppcgotp);
278}
279
280static int
281load_ppc64_glink(struct ltelf *lte, GElf_Addr *glinkp)
282{
283	return load_dynamic_entry(lte, DT_PPC64_GLINK, glinkp);
284}
285
286int
287arch_elf_init(struct ltelf *lte)
288{
289	lte->arch.secure_plt = !(lte->lte_flags & LTE_PLT_EXECUTABLE);
290	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
291		GElf_Addr ppcgot;
292		if (load_ppcgot(lte, &ppcgot) < 0) {
293			fprintf(stderr, "Couldn't find DT_PPC_GOT.\n");
294			return -1;
295		}
296		GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
297
298		assert (lte->relplt_size % 12 == 0);
299		size_t count = lte->relplt_size / 12; // size of RELA entry
300		lte->arch.plt_stub_vma = glink_vma
301			- (GElf_Addr)count * PPC_PLT_STUB_SIZE;
302		debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
303
304	} else if (lte->ehdr.e_machine == EM_PPC64) {
305		GElf_Addr glink_vma;
306		if (load_ppc64_glink(lte, &glink_vma) < 0) {
307			fprintf(stderr, "Couldn't find DT_PPC64_GLINK.\n");
308			return -1;
309		}
310
311		/* The first glink stub starts at offset 32.  */
312		lte->arch.plt_stub_vma = glink_vma + 32;
313	}
314
315	/* Override the value that we gleaned from flags on the .plt
316	 * section.  The PLT entries are in fact executable, they are
317	 * just not in .plt.  */
318	lte->lte_flags |= LTE_PLT_EXECUTABLE;
319
320	/* On PPC64, look for stub symbols in symbol table.  These are
321	 * called: xxxxxxxx.plt_call.callee_name@version+addend.  */
322	if (lte->ehdr.e_machine == EM_PPC64
323	    && lte->symtab != NULL && lte->strtab != NULL) {
324
325		/* N.B. We can't simply skip the symbols that we fail
326		 * to read or malloc.  There may be more than one stub
327		 * per symbol name, and if we failed in one but
328		 * succeeded in another, the PLT enabling code would
329		 * have no way to tell that something is missing.  We
330		 * could work around that, of course, but it doesn't
331		 * seem worth the trouble.  So if anything fails, we
332		 * just pretend that we don't have stub symbols at
333		 * all, as if the binary is stripped.  */
334
335		size_t i;
336		for (i = 0; i < lte->symtab_count; ++i) {
337			GElf_Sym sym;
338			if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
339				struct library_symbol *sym, *next;
340			fail:
341				for (sym = lte->arch.stubs; sym != NULL; ) {
342					next = sym->next;
343					library_symbol_destroy(sym);
344					free(sym);
345					sym = next;
346				}
347				lte->arch.stubs = NULL;
348				break;
349			}
350
351			const char *name = lte->strtab + sym.st_name;
352
353#define STUBN ".plt_call."
354			if ((name = strstr(name, STUBN)) == NULL)
355				continue;
356			name += sizeof(STUBN) - 1;
357#undef STUBN
358
359			size_t len;
360			const char *ver = strchr(name, '@');
361			if (ver != NULL) {
362				len = ver - name;
363
364			} else {
365				/* If there is "+" at all, check that
366				 * the symbol name ends in "+0".  */
367				const char *add = strrchr(name, '+');
368				if (add != NULL) {
369					assert(strcmp(add, "+0") == 0);
370					len = add - name;
371				} else {
372					len = strlen(name);
373				}
374			}
375
376			char *sym_name = strndup(name, len);
377			struct library_symbol *libsym = malloc(sizeof(*libsym));
378			if (sym_name == NULL || libsym == NULL) {
379				free(sym_name);
380				free(libsym);
381				goto fail;
382			}
383
384			target_address_t addr
385				= (target_address_t)sym.st_value + lte->bias;
386			library_symbol_init(libsym, addr, sym_name, 1,
387					    LS_TOPLT_EXEC);
388			libsym->arch.type = PPC64PLT_STUB;
389			libsym->next = lte->arch.stubs;
390			lte->arch.stubs = libsym;
391		}
392	}
393
394	return 0;
395}
396
397enum plt_status
398arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
399		       const char *a_name, GElf_Rela *rela, size_t ndx,
400		       struct library_symbol **ret)
401{
402	if (lte->ehdr.e_machine == EM_PPC)
403		return plt_default;
404
405	/* PPC64.  If we have stubs, we return a chain of breakpoint
406	 * sites, one for each stub that corresponds to this PLT
407	 * entry.  */
408	struct library_symbol *chain = NULL;
409	struct library_symbol **symp;
410	for (symp = &lte->arch.stubs; *symp != NULL; ) {
411		struct library_symbol *sym = *symp;
412		if (strcmp(sym->name, a_name) != 0) {
413			symp = &(*symp)->next;
414			continue;
415		}
416
417		/* Re-chain the symbol from stubs to CHAIN.  */
418		*symp = sym->next;
419		sym->next = chain;
420		chain = sym;
421	}
422
423	if (chain != NULL) {
424		struct library_symbol *sym;
425		for (sym = chain; sym != NULL; sym = sym->next)
426			fprintf(stderr, "match %s --> %p\n",
427				sym->name, sym->enter_addr);
428		for (sym = lte->arch.stubs; sym != NULL; sym = sym->next)
429			fprintf(stderr, "remains %s --> %p\n",
430				sym->name, sym->enter_addr);
431
432		*ret = chain;
433		return plt_ok;
434	}
435
436	/* We don't have stub symbols.  Find corresponding .plt slot,
437	 * and check whether it contains the corresponding PLT address
438	 * (or 0 if the dynamic linker hasn't run yet).  N.B. we don't
439	 * want read this from ELF file, but from process image.  That
440	 * makes a difference if we are attaching to a running
441	 * process.  */
442
443	GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela);
444	GElf_Addr plt_slot_addr = rela->r_offset;
445	assert(plt_slot_addr >= lte->plt_addr
446	       || plt_slot_addr < lte->plt_addr + lte->plt_size);
447
448	long plt_slot_value = ptrace(PTRACE_PEEKTEXT, proc->pid,
449				     plt_slot_addr, 0);
450	if (plt_slot_value == -1 && errno != 0) {
451		error(0, errno, "ptrace .plt slot value @%#" PRIx64,
452		      plt_slot_addr);
453		return plt_fail;
454	}
455
456	char *name = strdup(a_name);
457	struct library_symbol *libsym = malloc(sizeof(*libsym));
458	if (name == NULL || libsym == NULL) {
459		error(0, errno, "allocation for .plt slot");
460	fail:
461		free(name);
462		free(libsym);
463		return plt_fail;
464	}
465
466	library_symbol_init(libsym, (target_address_t)plt_entry_addr,
467			    name, 1, LS_TOPLT_EXEC);
468	if ((GElf_Addr)plt_slot_value == plt_entry_addr
469	    || plt_slot_value == 0) {
470		libsym->arch.type = PPC64PLT_UNRESOLVED;
471		libsym->arch.orig_addr = 0;
472	} else {
473		/* Unresolve the .plt slot.  If the binary was
474		 * prelinked, this makes the code invalid, because in
475		 * case of prelinked binary, the dynamic linker
476		 * doesn't update .plt[0] and .plt[1] with addresses
477		 * of the resover.  But we don't care, we will never
478		 * need to enter the resolver.  That just means that
479		 * we have to un-un-resolve this back before we
480		 * detach, which is nothing new: we already need to
481		 * retract breakpoints.  */
482		/* We only modify plt_entry[0], which holds the
483		 * resolved address of the routine.  We keep the TOC
484		 * and environment pointers intact.  Hence the only
485		 * adjustment that we need to do is to IP.  */
486		if (ptrace(PTRACE_POKETEXT, proc->pid,
487			   plt_slot_addr, plt_entry_addr) < 0) {
488			error(0, errno, "unresolve .plt slot");
489			goto fail;
490		}
491		libsym->arch.type = PPC64PLT_RESOLVED;
492		libsym->arch.orig_addr = plt_slot_value;
493	}
494
495	*ret = libsym;
496	return plt_ok;
497}
498
499void
500arch_elf_destroy(struct ltelf *lte)
501{
502	struct library_symbol *sym;
503	for (sym = lte->arch.stubs; sym != NULL; ) {
504		struct library_symbol *next = sym->next;
505		library_symbol_destroy(sym);
506		free(sym);
507		sym = next;
508	}
509}
510
511static void
512ppc64_resolved_bp_continue(struct breakpoint *bp, struct Process *proc)
513{
514	fprintf(stderr, "ppc64_resolved_bp_continue\n");
515	set_instruction_pointer(proc,
516				(target_address_t)bp->libsym->arch.orig_addr);
517	continue_process(proc->pid);
518}
519
520int
521arch_breakpoint_init(struct Process *proc, struct breakpoint *bp)
522{
523	if (proc->e_machine == EM_PPC
524	    || bp->libsym == NULL
525	    || bp->libsym->arch.type == PPC64PLT_STUB)
526		return 0;
527
528	if (bp->libsym->arch.type == PPC64PLT_RESOLVED) {
529		fprintf(stderr, "arch_breakpoint_init RESOLVED\n");
530		static struct bp_callbacks resolved_cbs = {
531			.on_continue = ppc64_resolved_bp_continue,
532		};
533		breakpoint_set_callbacks(bp, &resolved_cbs);
534
535	} else {
536		fprintf(stderr, "arch_breakpoint_init UNRESOLVED\n");
537		fprintf(stderr, "a.k.a the insane case\n");
538		abort();
539	}
540
541	return 0;
542}
543
544void
545arch_breakpoint_destroy(struct breakpoint *bp)
546{
547}
548