plt.c revision d1746d17eda0c4d2c1004c9deb8b229eb6fb1c78
1#include <gelf.h>
2#include <sys/ptrace.h>
3#include <errno.h>
4#include <error.h>
5#include <inttypes.h>
6#include <assert.h>
7#include <string.h>
8
9#include "proc.h"
10#include "common.h"
11#include "library.h"
12
13/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
14 * new-style "secure" PLT.  We can tell one from the other by the
15 * flags on the .plt section.  If it's +X (executable), it's BSS PLT,
16 * otherwise it's secure.
17 *
18 * BSS PLT works the same way as most architectures: the .plt section
19 * contains trampolines and we put breakpoints to those.  With secure
20 * PLT, the .plt section doesn't contain instructions but addresses.
21 * The real PLT table is stored in .text.  Addresses of those PLT
22 * entries can be computed, and it fact that's what the glink deal
23 * below does.
24 *
25 * If not prelinked, BSS PLT entries in the .plt section contain
26 * zeroes that are overwritten by the dynamic linker during start-up.
27 * For that reason, ltrace realizes those breakpoints only after
28 * .start is hit.
29 *
30 * 64-bit PPC is more involved.  Program linker creates for each
31 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
32 * (where xxxxxxxx is a hexadecimal number).  That stub does the call
33 * dispatch: it loads an address of a function to call from the
34 * section .plt, and branches.  PLT entries themselves are essentially
35 * a curried call to the resolver.  When the symbol is resolved, the
36 * resolver updates the value stored in .plt, and the next time
37 * around, the stub calls the library function directly.  So we make
38 * at most one trip (none if the binary is prelinked) through each PLT
39 * entry, and correspondingly that is useless as a breakpoint site.
40 *
41 * Note the three confusing terms: stubs (that play the role of PLT
42 * entries), PLT entries, .plt section.
43 *
44 * We first check symbol tables and see if we happen to have stub
45 * symbols available.  If yes we just put breakpoints to those, and
46 * treat them as usual breakpoints.  The only tricky part is realizing
47 * that there can be more than one breakpoint per symbol.
48 *
49 * The case that we don't have the stub symbols available is harder.
50 * The following scheme uses two kinds of PLT breakpoints: unresolved
51 * and resolved (to some address).  When the process starts (or when
52 * we attach), we distribute unresolved PLT breakpoints to the PLT
53 * entries (not stubs).  Then we look in .plt, and for each entry
54 * whose value is different than the corresponding PLT entry address,
55 * we assume it was already resolved, and convert the breakpoint to
56 * resolved.  We also rewrite the resolved value in .plt back to the
57 * PLT address.
58 *
59 * When a PLT entry hits a resolved breakpoint (which happens because
60 * we put back the unresolved addresses to .plt), we move the
61 * instruction pointer to the corresponding address and continue the
62 * process as if nothing happened.
63 *
64 * When unresolved PLT entry is called for the first time, we need to
65 * catch the new value that the resolver will write to a .plt slot.
66 * We also need to prevent another thread from racing through and
67 * taking the branch without ltrace noticing.  So when unresolved PLT
68 * entry hits, we have to stop all threads.  We then single-step
69 * through the resolver, until the .plt slot changes.  When it does,
70 * we treat it the same way as above: convert the PLT breakpoint to
71 * resolved, and rewrite the .plt value back to PLT address.  We then
72 * start all threads again.
73 *
74 * In theory we might find the exact instruction that will update the
75 * .plt slot, and emulate it, updating the PLT breakpoint immediately,
76 * and then just skip it.  But that's even messier than the thread
77 * stopping business and single stepping that needs to be done.
78 */
79
80#define PPC_PLT_STUB_SIZE 16
81
82static inline int
83host_powerpc64()
84{
85#ifdef __powerpc64__
86	return 1;
87#else
88	return 0;
89#endif
90}
91
92GElf_Addr
93arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
94{
95	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
96		assert(lte->arch.plt_stub_vma != 0);
97		return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
98
99	} else if (lte->ehdr.e_machine == EM_PPC) {
100		return rela->r_offset;
101
102	} else {
103		assert(lte->ehdr.e_machine == EM_PPC64);
104		fprintf(stderr, "PPC64\n");
105		abort();
106		return rela->r_offset;
107	}
108}
109
110int
111arch_translate_address(struct Process *proc,
112		       target_address_t addr, target_address_t *ret)
113{
114	if (host_powerpc64() && proc->e_machine == EM_PPC64) {
115		long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
116		fprintf(stderr, "arch_translate_address %p->%#lx\n",
117			addr, l);
118		if (l == -1 && errno) {
119			error(0, errno, ".opd translation of %p", addr);
120			return -1;
121		}
122		*ret = (target_address_t)l;
123		return 0;
124	}
125
126	*ret = addr;
127	return 0;
128}
129
130/* XXX Apparently PPC64 doesn't support PLT breakpoints.  */
131void *
132sym2addr(Process *proc, struct library_symbol *sym) {
133	void *addr = sym->enter_addr;
134	long pt_ret;
135
136	debug(3, 0);
137
138	if (sym->plt_type != LS_TOPLT_POINT) {
139		return addr;
140	}
141
142	if (proc->pid == 0) {
143		return 0;
144	}
145
146	if (options.debug >= 3) {
147		xinfdump(proc->pid, (void *)(((long)addr-32)&0xfffffff0),
148			 sizeof(void*)*8);
149	}
150
151	// On a PowerPC-64 system, a plt is three 64-bit words: the first is the
152	// 64-bit address of the routine.  Before the PLT has been initialized,
153	// this will be 0x0. In fact, the symbol table won't have the plt's
154	// address even.  Ater the PLT has been initialized, but before it has
155	// been resolved, the first word will be the address of the function in
156	// the dynamic linker that will reslove the PLT.  After the PLT is
157	// resolved, this will will be the address of the routine whose symbol
158	// is in the symbol table.
159
160	// On a PowerPC-32 system, there are two types of PLTs: secure (new) and
161	// non-secure (old).  For the secure case, the PLT is simply a pointer
162	// and we can treat it much as we do for the PowerPC-64 case.  For the
163	// non-secure case, the PLT is executable code and we can put the
164	// break-point right in the PLT.
165
166	pt_ret = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
167
168#if SIZEOF_LONG == 8
169	if (proc->mask_32bit) {
170		// Assume big-endian.
171		addr = (void *)((pt_ret >> 32) & 0xffffffff);
172	} else {
173		addr = (void *)pt_ret;
174	}
175#else
176	/* XXX Um, so where exactly are we dealing with the non-secure
177	   PLT thing?  */
178	addr = (void *)pt_ret;
179#endif
180
181	return addr;
182}
183
184static GElf_Addr
185get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
186{
187	Elf_Scn *ppcgot_sec = NULL;
188	GElf_Shdr ppcgot_shdr;
189	if (ppcgot != 0
190	    && elf_get_section_covering(lte, ppcgot,
191					&ppcgot_sec, &ppcgot_shdr) < 0)
192		// xxx should be the log out
193		fprintf(stderr,
194			"DT_PPC_GOT=%#" PRIx64 ", but no such section found.\n",
195			ppcgot);
196
197	if (ppcgot_sec != NULL) {
198		Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
199		if (data == NULL || data->d_size < 8 ) {
200			fprintf(stderr, "Couldn't read GOT data.\n");
201		} else {
202			// where PPCGOT begins in .got
203			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
204			assert(offset % 4 == 0);
205			uint32_t glink_vma;
206			if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
207				fprintf(stderr,
208					"Couldn't read glink VMA address"
209					" at %zd@GOT\n", offset);
210				return 0;
211			}
212			if (glink_vma != 0) {
213				debug(1, "PPC GOT glink_vma address: %#" PRIx32,
214				      glink_vma);
215				fprintf(stderr, "PPC GOT glink_vma "
216					"address: %#"PRIx32"\n", glink_vma);
217				return (GElf_Addr)glink_vma;
218			}
219		}
220	}
221
222	if (plt_data != NULL) {
223		uint32_t glink_vma;
224		if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
225			fprintf(stderr,
226				"Couldn't read glink VMA address at 0@.plt\n");
227			return 0;
228		}
229		debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
230		fprintf(stderr, ".plt glink_vma address: "
231			"%#"PRIx32"\n", glink_vma);
232		return (GElf_Addr)glink_vma;
233	}
234
235	return 0;
236}
237
238static int
239load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
240{
241	Elf_Scn *scn;
242	GElf_Shdr shdr;
243	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
244	    || scn == NULL) {
245	fail:
246		error(0, 0, "Couldn't get SHT_DYNAMIC: %s",
247		      elf_errmsg(-1));
248		return -1;
249	}
250
251	Elf_Data *data = elf_loaddata(scn, &shdr);
252	if (data == NULL)
253		goto fail;
254
255	size_t j;
256	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
257		GElf_Dyn dyn;
258		if (gelf_getdyn(data, j, &dyn) == NULL)
259			goto fail;
260
261		if(dyn.d_tag == tag) {
262			*valuep = dyn.d_un.d_ptr;
263			return 0;
264		}
265	}
266
267	return -1;
268}
269
270static int
271load_ppcgot(struct ltelf *lte, GElf_Addr *ppcgotp)
272{
273	return load_dynamic_entry(lte, DT_PPC_GOT, ppcgotp);
274}
275
276int
277arch_elf_init(struct ltelf *lte)
278{
279	lte->arch.secure_plt = !(lte->lte_flags & LTE_PLT_EXECUTABLE);
280	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
281		GElf_Addr ppcgot;
282		if (load_ppcgot(lte, &ppcgot) < 0) {
283			fprintf(stderr, "Couldn't find DT_PPC_GOT.\n");
284			return -1;
285		}
286		GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
287
288		assert (lte->relplt_size % 12 == 0);
289		size_t count = lte->relplt_size / 12; // size of RELA entry
290		lte->arch.plt_stub_vma = glink_vma
291			- (GElf_Addr)count * PPC_PLT_STUB_SIZE;
292		debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
293	}
294
295	/* Override the value that we gleaned from flags on the .plt
296	 * section.  The PLT entries are in fact executable, they are
297	 * just not in .plt.  */
298	lte->lte_flags |= LTE_PLT_EXECUTABLE;
299
300	/* On PPC64, look for stub symbols in symbol table.  These are
301	 * called: xxxxxxxx.plt_call.callee_name@version+addend.  */
302	if (lte->ehdr.e_machine == EM_PPC64
303	    && lte->symtab != NULL && lte->strtab != NULL) {
304
305		/* N.B. We can't simply skip the symbols that we fail
306		 * to read or malloc.  There may be more than one stub
307		 * per symbol name, and if we failed in one but
308		 * succeeded in another, the PLT enabling code would
309		 * have no way to tell that something is missing.  We
310		 * could work around that, of course, but it doesn't
311		 * seem worth the trouble.  So if anything fails, we
312		 * just pretend that we don't have stub symbols at
313		 * all, as if the binary is stripped.  */
314
315		size_t i;
316		for (i = 0; i < lte->symtab_count; ++i) {
317			GElf_Sym sym;
318			if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
319				struct library_symbol *sym, *next;
320			fail:
321				for (sym = lte->arch.stubs; sym != NULL; ) {
322					next = sym->next;
323					library_symbol_destroy(sym);
324					free(sym);
325					sym = next;
326				}
327				lte->arch.stubs = NULL;
328				break;
329			}
330
331			const char *name = lte->strtab + sym.st_name;
332
333#define STUBN ".plt_call."
334			if ((name = strstr(name, STUBN)) == NULL)
335				continue;
336			name += sizeof(STUBN) - 1;
337#undef STUBN
338
339			size_t len;
340			const char *ver = strchr(name, '@');
341			if (ver != NULL) {
342				len = ver - name;
343
344			} else {
345				/* If there is "+" at all, check that
346				 * the symbol name ends in "+0".  */
347				const char *add = strrchr(name, '+');
348				if (add != NULL) {
349					assert(strcmp(add, "+0") == 0);
350					len = add - name;
351				} else {
352					len = strlen(name);
353				}
354			}
355
356			char *sym_name = strndup(name, len);
357			struct library_symbol *libsym = malloc(sizeof(*libsym));
358			if (sym_name == NULL || libsym == NULL) {
359				free(sym_name);
360				free(libsym);
361				goto fail;
362			}
363
364			target_address_t addr
365				= (target_address_t)sym.st_value + lte->bias;
366			library_symbol_init(libsym, addr, sym_name, 1,
367					    LS_TOPLT_EXEC);
368			libsym->next = lte->arch.stubs;
369			lte->arch.stubs = libsym;
370		}
371	}
372
373	return 0;
374}
375
376enum plt_status
377arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
378		       const char *a_name, GElf_Rela *rela, size_t ndx,
379		       struct library_symbol **ret)
380{
381	if (lte->ehdr.e_machine == EM_PPC)
382		return plt_default;
383
384	/* PPC64.  If we have stubs, we return a chain of breakpoint
385	 * sites, one for each stub that corresponds to this PLT
386	 * entry.  */
387	struct library_symbol *chain = NULL;
388	struct library_symbol **symp;
389	for (symp = &lte->arch.stubs; *symp != NULL; ) {
390		struct library_symbol *sym = *symp;
391		if (strcmp(sym->name, a_name) != 0) {
392			symp = &(*symp)->next;
393			continue;
394		}
395
396		/* Re-chain the symbol from stubs to CHAIN.  */
397		*symp = sym->next;
398		sym->next = chain;
399		chain = sym;
400	}
401
402	if (chain != NULL) {
403		struct library_symbol *sym;
404		for (sym = chain; sym != NULL; sym = sym->next)
405			fprintf(stderr, "match %s --> %p\n",
406				sym->name, sym->enter_addr);
407		for (sym = lte->arch.stubs; sym != NULL; sym = sym->next)
408			fprintf(stderr, "remains %s --> %p\n",
409				sym->name, sym->enter_addr);
410
411		*ret = chain;
412		return plt_ok;
413	}
414
415	fprintf(stderr, "NO STUBS!\n");
416	abort();
417}
418
419void
420arch_elf_destroy(struct ltelf *lte)
421{
422	struct library_symbol *sym;
423	for (sym = lte->arch.stubs; sym != NULL; ) {
424		struct library_symbol *next = sym->next;
425		library_symbol_destroy(sym);
426		free(sym);
427		sym = next;
428	}
429}
430