plt.c revision 54bb64cf2eae7a0daa4d17e980b743b8ae69413b
1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2012,2013 Petr Machata, Red Hat Inc.
4 * Copyright (C) 2004,2008,2009 Juan Cespedes
5 * Copyright (C) 2006 Paul Gilliam
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
20 * 02110-1301 USA
21 */
22
23#include <gelf.h>
24#include <sys/ptrace.h>
25#include <errno.h>
26#include <inttypes.h>
27#include <assert.h>
28#include <stdbool.h>
29#include <string.h>
30
31#include "proc.h"
32#include "common.h"
33#include "insn.h"
34#include "library.h"
35#include "breakpoint.h"
36#include "linux-gnu/trace.h"
37#include "backend.h"
38
39/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
40 * new-style "secure" PLT.  We can tell one from the other by the
41 * flags on the .plt section.  If it's +X (executable), it's BSS PLT,
42 * otherwise it's secure.
43 *
44 * BSS PLT works the same way as most architectures: the .plt section
45 * contains trampolines and we put breakpoints to those.  If not
46 * prelinked, .plt contains zeroes, and dynamic linker fills in the
47 * initial set of trampolines, which means that we need to delay
48 * enabling breakpoints until after binary entry point is hit.
49 * Additionally, after first call, dynamic linker updates .plt with
50 * branch to resolved address.  That means that on first hit, we must
51 * do something similar to the PPC64 gambit described below.
52 *
53 * With secure PLT, the .plt section doesn't contain instructions but
54 * addresses.  The real PLT table is stored in .text.  Addresses of
55 * those PLT entries can be computed, and apart from the fact that
56 * they are in .text, they are ordinary PLT entries.
57 *
58 * 64-bit PPC is more involved.  Program linker creates for each
59 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
60 * (where xxxxxxxx is a hexadecimal number).  That stub does the call
61 * dispatch: it loads an address of a function to call from the
62 * section .plt, and branches.  PLT entries themselves are essentially
63 * a curried call to the resolver.  When the symbol is resolved, the
64 * resolver updates the value stored in .plt, and the next time
65 * around, the stub calls the library function directly.  So we make
66 * at most one trip (none if the binary is prelinked) through each PLT
67 * entry, and correspondingly that is useless as a breakpoint site.
68 *
69 * Note the three confusing terms: stubs (that play the role of PLT
70 * entries), PLT entries, .plt section.
71 *
72 * We first check symbol tables and see if we happen to have stub
73 * symbols available.  If yes we just put breakpoints to those, and
74 * treat them as usual breakpoints.  The only tricky part is realizing
75 * that there can be more than one breakpoint per symbol.
76 *
77 * The case that we don't have the stub symbols available is harder.
78 * The following scheme uses two kinds of PLT breakpoints: unresolved
79 * and resolved (to some address).  When the process starts (or when
80 * we attach), we distribute unresolved PLT breakpoints to the PLT
81 * entries (not stubs).  Then we look in .plt, and for each entry
82 * whose value is different than the corresponding PLT entry address,
83 * we assume it was already resolved, and convert the breakpoint to
84 * resolved.  We also rewrite the resolved value in .plt back to the
85 * PLT address.
86 *
87 * When a PLT entry hits a resolved breakpoint (which happens because
88 * we rewrite .plt with the original unresolved addresses), we move
89 * the instruction pointer to the corresponding address and continue
90 * the process as if nothing happened.
91 *
92 * When unresolved PLT entry is called for the first time, we need to
93 * catch the new value that the resolver will write to a .plt slot.
94 * We also need to prevent another thread from racing through and
95 * taking the branch without ltrace noticing.  So when unresolved PLT
96 * entry hits, we have to stop all threads.  We then single-step
97 * through the resolver, until the .plt slot changes.  When it does,
98 * we treat it the same way as above: convert the PLT breakpoint to
99 * resolved, and rewrite the .plt value back to PLT address.  We then
100 * start all threads again.
101 *
102 * As an optimization, we remember the address where the address was
103 * resolved, and put a breakpoint there.  The next time around (when
104 * the next PLT entry is to be resolved), instead of single-stepping
105 * through half the dynamic linker, we just let the thread run and hit
106 * this breakpoint.  When it hits, we know the PLT entry was resolved.
107 *
108 * Another twist comes from tracing slots corresponding to
109 * R_PPC64_JMP_IREL relocations.  These have no dedicated PLT entry.
110 * The calls are done directly from stubs, and the .plt entry
111 * (actually .iplt entry, these live in a special section) is resolved
112 * in advance before the binary starts.  Because there's no PLT entry,
113 * we put the PLT breakpoints directly to the IFUNC resolver code, and
114 * then would like them to behave like ordinary PLT slots, including
115 * catching the point where these get resolved to unresolve them.  So
116 * for the first call (which is the actual resolver call), we pretend
117 * that this breakpoint is artificial and has no associated symbol,
118 * and turn it on fully only after the first hit.  Ideally we would
119 * trace that first call as well, but then the stepper, which tries to
120 * catch the point where the slot is resolved, would hit the return
121 * breakpoint and that's not currently handled well.
122 *
123 * On PPC32 with secure PLT, IFUNC symbols in main binary actually
124 * don't refer to the resolver itself.  Instead they refer to a PLT
125 * slot.
126 *
127 * XXX TODO If we have hardware watch point, we might put a read watch
128 * on .plt slot, and discover the offenders this way.  I don't know
129 * the details, but I assume at most a handful (like, one or two, if
130 * available at all) addresses may be watched at a time, and thus this
131 * would be used as an amendment of the above rather than full-on
132 * solution to PLT tracing on PPC.
133 */
134
135#define PPC_PLT_STUB_SIZE 16
136#define PPC64_PLT_STUB_SIZE 8 //xxx
137
138static inline int
139host_powerpc64()
140{
141#ifdef __powerpc64__
142	return 1;
143#else
144	return 0;
145#endif
146}
147
148static void
149mark_as_resolved(struct library_symbol *libsym, GElf_Addr value)
150{
151	libsym->arch.type = PPC_PLT_RESOLVED;
152	libsym->arch.resolved_value = value;
153}
154
155static void
156ppc32_delayed_symbol(struct library_symbol *libsym)
157{
158	/* arch_dynlink_done is called on attach as well.  In that
159	 * case some slots will have been resolved already.
160	 * Unresolved PLT looks like this:
161	 *
162	 *    <sleep@plt>:	li      r11,0
163	 *    <sleep@plt+4>:	b       "resolve"
164	 *
165	 * "resolve" is another address in PLTGOT (the same block that
166	 * all the PLT slots are it).  When resolved, it looks either
167	 * this way:
168	 *
169	 *    <sleep@plt>:	b       0xfea88d0 <sleep>
170	 *
171	 * Which is easy to detect.  It can also look this way:
172	 *
173	 *    <sleep@plt>:	li      r11,0
174	 *    <sleep@plt+4>:	b       "dispatch"
175	 *
176	 * The "dispatch" address lies in PLTGOT as well.  In current
177	 * GNU toolchain, "dispatch" address is the same as PLTGOT
178	 * address.  We rely on this to figure out whether the address
179	 * is resolved or not.  */
180
181	uint32_t insn1 = libsym->arch.resolved_value >> 32;
182	uint32_t insn2 = (uint32_t) libsym->arch.resolved_value;
183	if ((insn1 & BRANCH_MASK) == B_INSN
184	    || ((insn2 & BRANCH_MASK) == B_INSN
185		/* XXX double cast  */
186		&& (ppc_branch_dest(libsym->enter_addr + 4, insn2)
187		    == (arch_addr_t) (long) libsym->lib->arch.pltgot_addr)))
188	{
189		mark_as_resolved(libsym, libsym->arch.resolved_value);
190	}
191}
192
193void
194arch_dynlink_done(struct process *proc)
195{
196	/* We may need to activate delayed symbols.  */
197	struct library_symbol *libsym = NULL;
198	while ((libsym = proc_each_symbol(proc, libsym,
199					  library_symbol_delayed_cb, NULL))) {
200		if (proc_read_64(proc, libsym->enter_addr,
201				 &libsym->arch.resolved_value) < 0) {
202			fprintf(stderr,
203				"couldn't read PLT value for %s(%p): %s\n",
204				libsym->name, libsym->enter_addr,
205				strerror(errno));
206			return;
207		}
208
209		if (proc->e_machine == EM_PPC)
210			ppc32_delayed_symbol(libsym);
211
212		if (proc_activate_delayed_symbol(proc, libsym) < 0)
213			return;
214
215		if (proc->e_machine == EM_PPC)
216			/* XXX double cast  */
217			libsym->arch.plt_slot_addr
218				= (GElf_Addr) (uintptr_t) libsym->enter_addr;
219	}
220}
221
222static bool
223reloc_is_irelative(int machine, GElf_Rela *rela)
224{
225	bool irelative = false;
226	if (machine == EM_PPC64) {
227#ifdef R_PPC64_JMP_IREL
228		irelative = GELF_R_TYPE(rela->r_info) == R_PPC64_JMP_IREL;
229#endif
230	} else {
231		assert(machine == EM_PPC);
232#ifdef R_PPC_IRELATIVE
233		irelative = GELF_R_TYPE(rela->r_info) == R_PPC_IRELATIVE;
234#endif
235	}
236	return irelative;
237}
238
239GElf_Addr
240arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
241{
242	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
243		assert(lte->arch.plt_stub_vma != 0);
244		return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
245
246	} else if (lte->ehdr.e_machine == EM_PPC) {
247		return rela->r_offset;
248
249	/* Beyond this point, we are on PPC64, but don't have stub
250	 * symbols.  */
251
252	} else if (reloc_is_irelative(lte->ehdr.e_machine, rela)) {
253
254		/* Put JMP_IREL breakpoint to resolver, since there's
255		 * no dedicated PLT entry.  */
256
257		assert(rela->r_addend != 0);
258		/* XXX double cast */
259		arch_addr_t res_addr = (arch_addr_t) (uintptr_t) rela->r_addend;
260		if (arch_translate_address(lte, res_addr, &res_addr) < 0) {
261			fprintf(stderr, "Couldn't OPD-translate IRELATIVE "
262				"resolver address.\n");
263			return 0;
264		}
265		/* XXX double cast */
266		return (GElf_Addr) (uintptr_t) res_addr;
267
268	} else {
269		/* We put brakpoints to PLT entries the same as the
270		 * PPC32 secure PLT case does. */
271		assert(lte->arch.plt_stub_vma != 0);
272		return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx;
273	}
274}
275
276/* This entry point is called when ltelf is not available
277 * anymore--during runtime.  At that point we don't have to concern
278 * ourselves with bias, as the values in OPD have been resolved
279 * already.  */
280int
281arch_translate_address_dyn(struct process *proc,
282			   arch_addr_t addr, arch_addr_t *ret)
283{
284	if (proc->e_machine == EM_PPC64) {
285		uint64_t value;
286		if (proc_read_64(proc, addr, &value) < 0) {
287			fprintf(stderr,
288				"dynamic .opd translation of %p: %s\n",
289				addr, strerror(errno));
290			return -1;
291		}
292		/* XXX The double cast should be removed when
293		 * arch_addr_t becomes integral type.  */
294		*ret = (arch_addr_t)(uintptr_t)value;
295		return 0;
296	}
297
298	*ret = addr;
299	return 0;
300}
301
302int
303arch_translate_address(struct ltelf *lte,
304		       arch_addr_t addr, arch_addr_t *ret)
305{
306	if (lte->ehdr.e_machine == EM_PPC64) {
307		/* XXX The double cast should be removed when
308		 * arch_addr_t becomes integral type.  */
309		GElf_Xword offset
310			= (GElf_Addr)(uintptr_t)addr - lte->arch.opd_base;
311		uint64_t value;
312		if (elf_read_u64(lte->arch.opd_data, offset, &value) < 0) {
313			fprintf(stderr, "static .opd translation of %p: %s\n",
314				addr, elf_errmsg(-1));
315			return -1;
316		}
317		*ret = (arch_addr_t)(uintptr_t)(value + lte->bias);
318		return 0;
319	}
320
321	*ret = addr;
322	return 0;
323}
324
325static int
326load_opd_data(struct ltelf *lte, struct library *lib)
327{
328	Elf_Scn *sec;
329	GElf_Shdr shdr;
330	if (elf_get_section_named(lte, ".opd", &sec, &shdr) < 0
331	    || sec == NULL) {
332	fail:
333		fprintf(stderr, "couldn't find .opd data\n");
334		return -1;
335	}
336
337	lte->arch.opd_data = elf_rawdata(sec, NULL);
338	if (lte->arch.opd_data == NULL)
339		goto fail;
340
341	lte->arch.opd_base = shdr.sh_addr + lte->bias;
342	lte->arch.opd_size = shdr.sh_size;
343
344	return 0;
345}
346
347void *
348sym2addr(struct process *proc, struct library_symbol *sym)
349{
350	return sym->enter_addr;
351}
352
353static GElf_Addr
354get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
355{
356	Elf_Scn *ppcgot_sec = NULL;
357	GElf_Shdr ppcgot_shdr;
358	if (ppcgot != 0
359	    && (elf_get_section_covering(lte, ppcgot,
360					 &ppcgot_sec, &ppcgot_shdr) < 0
361		|| ppcgot_sec == NULL))
362		fprintf(stderr,
363			"DT_PPC_GOT=%#"PRIx64", but no such section found\n",
364			ppcgot);
365
366	if (ppcgot_sec != NULL) {
367		Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
368		if (data == NULL || data->d_size < 8 ) {
369			fprintf(stderr, "couldn't read GOT data\n");
370		} else {
371			// where PPCGOT begins in .got
372			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
373			assert(offset % 4 == 0);
374			uint32_t glink_vma;
375			if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
376				fprintf(stderr, "couldn't read glink VMA"
377					" address at %zd@GOT\n", offset);
378				return 0;
379			}
380			if (glink_vma != 0) {
381				debug(1, "PPC GOT glink_vma address: %#" PRIx32,
382				      glink_vma);
383				return (GElf_Addr)glink_vma;
384			}
385		}
386	}
387
388	if (plt_data != NULL) {
389		uint32_t glink_vma;
390		if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
391			fprintf(stderr, "couldn't read glink VMA address\n");
392			return 0;
393		}
394		debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
395		return (GElf_Addr)glink_vma;
396	}
397
398	return 0;
399}
400
401static int
402load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
403{
404	Elf_Scn *scn;
405	GElf_Shdr shdr;
406	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
407	    || scn == NULL) {
408	fail:
409		fprintf(stderr, "Couldn't get SHT_DYNAMIC: %s\n",
410			elf_errmsg(-1));
411		return -1;
412	}
413
414	Elf_Data *data = elf_loaddata(scn, &shdr);
415	if (data == NULL)
416		goto fail;
417
418	size_t j;
419	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
420		GElf_Dyn dyn;
421		if (gelf_getdyn(data, j, &dyn) == NULL)
422			goto fail;
423
424		if(dyn.d_tag == tag) {
425			*valuep = dyn.d_un.d_ptr;
426			return 0;
427		}
428	}
429
430	return -1;
431}
432
433static int
434nonzero_data(Elf_Data *data)
435{
436	/* We are not supposed to get here if there's no PLT.  */
437	assert(data != NULL);
438
439	unsigned char *buf = data->d_buf;
440	if (buf == NULL)
441		return 0;
442
443	size_t i;
444	for (i = 0; i < data->d_size; ++i)
445		if (buf[i] != 0)
446			return 1;
447	return 0;
448}
449
450static enum callback_status
451reloc_copy_if_irelative(GElf_Rela *rela, void *data)
452{
453	struct ltelf *lte = data;
454
455	return CBS_STOP_IF(reloc_is_irelative(lte->ehdr.e_machine, rela)
456			   && VECT_PUSHBACK(&lte->plt_relocs, rela) < 0);
457}
458
459int
460arch_elf_init(struct ltelf *lte, struct library *lib)
461{
462	if (lte->ehdr.e_machine == EM_PPC64
463	    && load_opd_data(lte, lib) < 0)
464		return -1;
465
466	lte->arch.secure_plt = !(lte->plt_flags & SHF_EXECINSTR);
467
468	/* For PPC32 BSS, it is important whether the binary was
469	 * prelinked.  If .plt section is NODATA, or if it contains
470	 * zeroes, then this library is not prelinked, and we need to
471	 * delay breakpoints.  */
472	if (lte->ehdr.e_machine == EM_PPC && !lte->arch.secure_plt)
473		lib->arch.bss_plt_prelinked = nonzero_data(lte->plt_data);
474	else
475		/* For cases where it's irrelevant, initialize the
476		 * value to something conspicuous.  */
477		lib->arch.bss_plt_prelinked = -1;
478
479	/* On PPC64 and PPC32 secure, IRELATIVE relocations actually
480	 * relocate .iplt section, and as such are stored in .rela.dyn
481	 * (where all non-PLT relocations are stored) instead of
482	 * .rela.plt.  Add these to lte->plt_relocs.  */
483
484	GElf_Addr rela, relasz;
485	Elf_Scn *rela_sec;
486	GElf_Shdr rela_shdr;
487	if ((lte->ehdr.e_machine == EM_PPC64 || lte->arch.secure_plt)
488	    && load_dynamic_entry(lte, DT_RELA, &rela) == 0
489	    && load_dynamic_entry(lte, DT_RELASZ, &relasz) == 0
490	    && elf_get_section_covering(lte, rela, &rela_sec, &rela_shdr) == 0
491	    && rela_sec != NULL) {
492
493		struct vect v;
494		VECT_INIT(&v, GElf_Rela);
495		int ret = elf_read_relocs(lte, rela_sec, &rela_shdr, &v);
496		if (ret >= 0
497		    && VECT_EACH(&v, GElf_Rela, NULL,
498				 reloc_copy_if_irelative, lte) != NULL)
499			ret = -1;
500
501		VECT_DESTROY(&v, GElf_Rela, NULL, NULL);
502
503		if (ret < 0)
504			return ret;
505	}
506
507	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
508		GElf_Addr ppcgot;
509		if (load_dynamic_entry(lte, DT_PPC_GOT, &ppcgot) < 0) {
510			fprintf(stderr, "couldn't find DT_PPC_GOT\n");
511			return -1;
512		}
513		GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
514
515		size_t count = vect_size(&lte->plt_relocs);
516		lte->arch.plt_stub_vma = glink_vma
517			- (GElf_Addr) count * PPC_PLT_STUB_SIZE;
518		debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
519
520	} else if (lte->ehdr.e_machine == EM_PPC64) {
521		GElf_Addr glink_vma;
522		if (load_dynamic_entry(lte, DT_PPC64_GLINK, &glink_vma) < 0) {
523			fprintf(stderr, "couldn't find DT_PPC64_GLINK\n");
524			return -1;
525		}
526
527		/* The first glink stub starts at offset 32.  */
528		lte->arch.plt_stub_vma = glink_vma + 32;
529
530	} else {
531		/* By exhaustion--PPC32 BSS.  */
532		if (load_dynamic_entry(lte, DT_PLTGOT,
533				       &lib->arch.pltgot_addr) < 0) {
534			fprintf(stderr, "couldn't find DT_PLTGOT\n");
535			return -1;
536		}
537	}
538
539	/* On PPC64, look for stub symbols in symbol table.  These are
540	 * called: xxxxxxxx.plt_call.callee_name@version+addend.  */
541	if (lte->ehdr.e_machine == EM_PPC64
542	    && lte->symtab != NULL && lte->strtab != NULL) {
543
544		/* N.B. We can't simply skip the symbols that we fail
545		 * to read or malloc.  There may be more than one stub
546		 * per symbol name, and if we failed in one but
547		 * succeeded in another, the PLT enabling code would
548		 * have no way to tell that something is missing.  We
549		 * could work around that, of course, but it doesn't
550		 * seem worth the trouble.  So if anything fails, we
551		 * just pretend that we don't have stub symbols at
552		 * all, as if the binary is stripped.  */
553
554		size_t i;
555		for (i = 0; i < lte->symtab_count; ++i) {
556			GElf_Sym sym;
557			if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
558				struct library_symbol *sym, *next;
559			fail:
560				for (sym = lte->arch.stubs; sym != NULL; ) {
561					next = sym->next;
562					library_symbol_destroy(sym);
563					free(sym);
564					sym = next;
565				}
566				lte->arch.stubs = NULL;
567				break;
568			}
569
570			const char *name = lte->strtab + sym.st_name;
571
572#define STUBN ".plt_call."
573			if ((name = strstr(name, STUBN)) == NULL)
574				continue;
575			name += sizeof(STUBN) - 1;
576#undef STUBN
577
578			size_t len;
579			const char *ver = strchr(name, '@');
580			if (ver != NULL) {
581				len = ver - name;
582
583			} else {
584				/* If there is "+" at all, check that
585				 * the symbol name ends in "+0".  */
586				const char *add = strrchr(name, '+');
587				if (add != NULL) {
588					assert(strcmp(add, "+0") == 0);
589					len = add - name;
590				} else {
591					len = strlen(name);
592				}
593			}
594
595			char *sym_name = strndup(name, len);
596			struct library_symbol *libsym = malloc(sizeof(*libsym));
597			if (sym_name == NULL || libsym == NULL) {
598			fail2:
599				free(sym_name);
600				free(libsym);
601				goto fail;
602			}
603
604			/* XXX The double cast should be removed when
605			 * arch_addr_t becomes integral type.  */
606			arch_addr_t addr = (arch_addr_t)
607				(uintptr_t)sym.st_value + lte->bias;
608			if (library_symbol_init(libsym, addr, sym_name, 1,
609						LS_TOPLT_EXEC) < 0)
610				goto fail2;
611			libsym->arch.type = PPC64_PLT_STUB;
612			libsym->next = lte->arch.stubs;
613			lte->arch.stubs = libsym;
614		}
615	}
616
617	return 0;
618}
619
620static int
621read_plt_slot_value(struct process *proc, GElf_Addr addr, GElf_Addr *valp)
622{
623	/* On PPC64, we read from .plt, which contains 8 byte
624	 * addresses.  On PPC32 we read from .plt, which contains 4
625	 * byte instructions, but the PLT is two instructions, and
626	 * either can change.  */
627	uint64_t l;
628	/* XXX double cast.  */
629	if (proc_read_64(proc, (arch_addr_t)(uintptr_t)addr, &l) < 0) {
630		fprintf(stderr, "ptrace .plt slot value @%#" PRIx64": %s\n",
631			addr, strerror(errno));
632		return -1;
633	}
634
635	*valp = (GElf_Addr)l;
636	return 0;
637}
638
639static int
640unresolve_plt_slot(struct process *proc, GElf_Addr addr, GElf_Addr value)
641{
642	/* We only modify plt_entry[0], which holds the resolved
643	 * address of the routine.  We keep the TOC and environment
644	 * pointers intact.  Hence the only adjustment that we need to
645	 * do is to IP.  */
646	if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) {
647		fprintf(stderr, "failed to unresolve .plt slot: %s\n",
648			strerror(errno));
649		return -1;
650	}
651	return 0;
652}
653
654enum plt_status
655arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
656		       const char *a_name, GElf_Rela *rela, size_t ndx,
657		       struct library_symbol **ret)
658{
659	bool is_irelative = reloc_is_irelative(lte->ehdr.e_machine, rela);
660	char *name;
661	if (! is_irelative) {
662		name = strdup(a_name);
663	} else {
664		GElf_Addr addr = lte->ehdr.e_machine == EM_PPC64
665			? (GElf_Addr) rela->r_addend
666			: arch_plt_sym_val(lte, ndx, rela);
667		name = linux_elf_find_irelative_name(lte, addr);
668	}
669
670	if (name == NULL) {
671	fail:
672		free(name);
673		return PLT_FAIL;
674	}
675
676	struct library_symbol *chain = NULL;
677	if (lte->ehdr.e_machine == EM_PPC) {
678		if (default_elf_add_plt_entry(proc, lte, name, rela, ndx,
679					      &chain) < 0)
680			goto fail;
681
682		if (! lte->arch.secure_plt) {
683			/* On PPC32 with BSS PLT, delay the symbol
684			 * until dynamic linker is done.  */
685			assert(!chain->delayed);
686			chain->delayed = 1;
687		}
688
689	ok:
690		*ret = chain;
691		free(name);
692		return PLT_OK;
693	}
694
695	/* PPC64.  If we have stubs, we return a chain of breakpoint
696	 * sites, one for each stub that corresponds to this PLT
697	 * entry.  */
698	struct library_symbol **symp;
699	for (symp = &lte->arch.stubs; *symp != NULL; ) {
700		struct library_symbol *sym = *symp;
701		if (strcmp(sym->name, name) != 0) {
702			symp = &(*symp)->next;
703			continue;
704		}
705
706		/* Re-chain the symbol from stubs to CHAIN.  */
707		*symp = sym->next;
708		sym->next = chain;
709		chain = sym;
710	}
711
712	if (chain != NULL)
713		goto ok;
714
715	/* We don't have stub symbols.  Find corresponding .plt slot,
716	 * and check whether it contains the corresponding PLT address
717	 * (or 0 if the dynamic linker hasn't run yet).  N.B. we don't
718	 * want read this from ELF file, but from process image.  That
719	 * makes a difference if we are attaching to a running
720	 * process.  */
721
722	GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela);
723	GElf_Addr plt_slot_addr = rela->r_offset;
724
725	assert(plt_slot_addr >= lte->plt_addr
726	       || plt_slot_addr < lte->plt_addr + lte->plt_size);
727
728	GElf_Addr plt_slot_value;
729	if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0)
730		goto fail;
731
732	struct library_symbol *libsym = malloc(sizeof(*libsym));
733	if (libsym == NULL) {
734		fprintf(stderr, "allocation for .plt slot: %s\n",
735			strerror(errno));
736	fail2:
737		free(libsym);
738		goto fail;
739	}
740
741	/* XXX The double cast should be removed when
742	 * arch_addr_t becomes integral type.  */
743	if (library_symbol_init(libsym,
744				(arch_addr_t) (uintptr_t) plt_entry_addr,
745				name, 1, LS_TOPLT_EXEC) < 0)
746		goto fail2;
747	libsym->arch.plt_slot_addr = plt_slot_addr;
748
749	if (! is_irelative
750	    && (plt_slot_value == plt_entry_addr || plt_slot_value == 0)) {
751		libsym->arch.type = PPC_PLT_UNRESOLVED;
752		libsym->arch.resolved_value = plt_entry_addr;
753
754	} else {
755		/* Unresolve the .plt slot.  If the binary was
756		 * prelinked, this makes the code invalid, because in
757		 * case of prelinked binary, the dynamic linker
758		 * doesn't update .plt[0] and .plt[1] with addresses
759		 * of the resover.  But we don't care, we will never
760		 * need to enter the resolver.  That just means that
761		 * we have to un-un-resolve this back before we
762		 * detach.  */
763
764		if (unresolve_plt_slot(proc, plt_slot_addr, plt_entry_addr) < 0) {
765			library_symbol_destroy(libsym);
766			goto fail2;
767		}
768
769		if (! is_irelative) {
770			mark_as_resolved(libsym, plt_slot_value);
771		} else {
772			libsym->arch.type = PPC_PLT_IRELATIVE;
773			libsym->arch.resolved_value = plt_entry_addr;
774		}
775	}
776
777	*ret = libsym;
778	return PLT_OK;
779}
780
781void
782arch_elf_destroy(struct ltelf *lte)
783{
784	struct library_symbol *sym;
785	for (sym = lte->arch.stubs; sym != NULL; ) {
786		struct library_symbol *next = sym->next;
787		library_symbol_destroy(sym);
788		free(sym);
789		sym = next;
790	}
791}
792
793static void
794dl_plt_update_bp_on_hit(struct breakpoint *bp, struct process *proc)
795{
796	debug(DEBUG_PROCESS, "pid=%d dl_plt_update_bp_on_hit %s(%p)",
797	      proc->pid, breakpoint_name(bp), bp->addr);
798	struct process_stopping_handler *self = proc->arch.handler;
799	assert(self != NULL);
800
801	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
802	GElf_Addr value;
803	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
804		return;
805
806	/* On PPC64, we rewrite the slot value.  */
807	if (proc->e_machine == EM_PPC64)
808		unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
809				   libsym->arch.resolved_value);
810	/* We mark the breakpoint as resolved on both arches.  */
811	mark_as_resolved(libsym, value);
812
813	/* cb_on_all_stopped looks if HANDLER is set to NULL as a way
814	 * to check that this was run.  It's an error if it
815	 * wasn't.  */
816	proc->arch.handler = NULL;
817
818	breakpoint_turn_off(bp, proc);
819}
820
821static void
822cb_on_all_stopped(struct process_stopping_handler *self)
823{
824	/* Put that in for dl_plt_update_bp_on_hit to see.  */
825	assert(self->task_enabling_breakpoint->arch.handler == NULL);
826	self->task_enabling_breakpoint->arch.handler = self;
827
828	linux_ptrace_disable_and_continue(self);
829}
830
831static enum callback_status
832cb_keep_stepping_p(struct process_stopping_handler *self)
833{
834	struct process *proc = self->task_enabling_breakpoint;
835	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
836
837	GElf_Addr value;
838	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
839		return CBS_FAIL;
840
841	/* In UNRESOLVED state, the RESOLVED_VALUE in fact contains
842	 * the PLT entry value.  */
843	if (value == libsym->arch.resolved_value)
844		return CBS_CONT;
845
846	debug(DEBUG_PROCESS, "pid=%d PLT got resolved to value %#"PRIx64,
847	      proc->pid, value);
848
849	/* The .plt slot got resolved!  We can migrate the breakpoint
850	 * to RESOLVED and stop single-stepping.  */
851	if (proc->e_machine == EM_PPC64
852	    && unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
853				  libsym->arch.resolved_value) < 0)
854		return CBS_FAIL;
855
856	/* Resolving on PPC64 consists of overwriting a doubleword in
857	 * .plt.  That doubleword is than read back by a stub, and
858	 * jumped on.  Hopefully we can assume that double word update
859	 * is done on a single place only, as it contains a final
860	 * address.  We still need to look around for any sync
861	 * instruction, but essentially it is safe to optimize away
862	 * the single stepping next time and install a post-update
863	 * breakpoint.
864	 *
865	 * The situation on PPC32 BSS is more complicated.  The
866	 * dynamic linker here updates potentially several
867	 * instructions (XXX currently we assume two) and the rules
868	 * are more complicated.  Sometimes it's enough to adjust just
869	 * one of the addresses--the logic for generating optimal
870	 * dispatch depends on relative addresses of the .plt entry
871	 * and the jump destination.  We can't assume that the some
872	 * instruction block does the update every time.  So on PPC32,
873	 * we turn the optimization off and just step through it each
874	 * time.  */
875	if (proc->e_machine == EM_PPC)
876		goto done;
877
878	/* Install breakpoint to the address where the change takes
879	 * place.  If we fail, then that just means that we'll have to
880	 * singlestep the next time around as well.  */
881	struct process *leader = proc->leader;
882	if (leader == NULL || leader->arch.dl_plt_update_bp != NULL)
883		goto done;
884
885	/* We need to install to the next instruction.  ADDR points to
886	 * a store instruction, so moving the breakpoint one
887	 * instruction forward is safe.  */
888	arch_addr_t addr = get_instruction_pointer(proc) + 4;
889	leader->arch.dl_plt_update_bp = insert_breakpoint_at(proc, addr, NULL);
890	if (leader->arch.dl_plt_update_bp == NULL)
891		goto done;
892
893	static struct bp_callbacks dl_plt_update_cbs = {
894		.on_hit = dl_plt_update_bp_on_hit,
895	};
896	leader->arch.dl_plt_update_bp->cbs = &dl_plt_update_cbs;
897
898	/* Turn it off for now.  We will turn it on again when we hit
899	 * the PLT entry that needs this.  */
900	breakpoint_turn_off(leader->arch.dl_plt_update_bp, proc);
901
902done:
903	mark_as_resolved(libsym, value);
904
905	return CBS_STOP;
906}
907
908static void
909jump_to_entry_point(struct process *proc, struct breakpoint *bp)
910{
911	/* XXX The double cast should be removed when
912	 * arch_addr_t becomes integral type.  */
913	arch_addr_t rv = (arch_addr_t)
914		(uintptr_t)bp->libsym->arch.resolved_value;
915	set_instruction_pointer(proc, rv);
916}
917
918static void
919ppc_plt_bp_continue(struct breakpoint *bp, struct process *proc)
920{
921	/* If this is a first call through IREL breakpoint, enable the
922	 * symbol so that it doesn't look like an artificial
923	 * breakpoint anymore.  */
924	if (bp->libsym == NULL) {
925		assert(bp->arch.irel_libsym != NULL);
926		bp->libsym = bp->arch.irel_libsym;
927		bp->arch.irel_libsym = NULL;
928	}
929
930	switch (bp->libsym->arch.type) {
931		struct process *leader;
932		void (*on_all_stopped)(struct process_stopping_handler *);
933		enum callback_status (*keep_stepping_p)
934			(struct process_stopping_handler *);
935
936	case PPC_DEFAULT:
937		assert(proc->e_machine == EM_PPC);
938		assert(bp->libsym != NULL);
939		assert(bp->libsym->lib->arch.bss_plt_prelinked == 0);
940		/* Fall through.  */
941
942	case PPC_PLT_IRELATIVE:
943	case PPC_PLT_UNRESOLVED:
944		on_all_stopped = NULL;
945		keep_stepping_p = NULL;
946		leader = proc->leader;
947
948		if (leader != NULL && leader->arch.dl_plt_update_bp != NULL
949		    && breakpoint_turn_on(leader->arch.dl_plt_update_bp,
950					  proc) >= 0)
951			on_all_stopped = cb_on_all_stopped;
952		else
953			keep_stepping_p = cb_keep_stepping_p;
954
955		if (process_install_stopping_handler
956		    (proc, bp, on_all_stopped, keep_stepping_p, NULL) < 0) {
957			fprintf(stderr,	"ppc_plt_bp_continue: "
958				"couldn't install event handler\n");
959			continue_after_breakpoint(proc, bp);
960		}
961		return;
962
963	case PPC_PLT_RESOLVED:
964		if (proc->e_machine == EM_PPC) {
965			continue_after_breakpoint(proc, bp);
966			return;
967		}
968
969		jump_to_entry_point(proc, bp);
970		continue_process(proc->pid);
971		return;
972
973	case PPC64_PLT_STUB:
974		/* These should never hit here.  */
975		break;
976	}
977
978	assert(bp->libsym->arch.type != bp->libsym->arch.type);
979	abort();
980}
981
982/* When a process is in a PLT stub, it may have already read the data
983 * in .plt that we changed.  If we detach now, it will jump to PLT
984 * entry and continue to the dynamic linker, where it will SIGSEGV,
985 * because zeroth .plt slot is not filled in prelinked binaries, and
986 * the dynamic linker needs that data.  Moreover, the process may
987 * actually have hit the breakpoint already.  This functions tries to
988 * detect both cases and do any fix-ups necessary to mend this
989 * situation.  */
990static enum callback_status
991detach_task_cb(struct process *task, void *data)
992{
993	struct breakpoint *bp = data;
994
995	if (get_instruction_pointer(task) == bp->addr) {
996		debug(DEBUG_PROCESS, "%d at %p, which is PLT slot",
997		      task->pid, bp->addr);
998		jump_to_entry_point(task, bp);
999		return CBS_CONT;
1000	}
1001
1002	/* XXX There's still a window of several instructions where we
1003	 * might catch the task inside a stub such that it has already
1004	 * read destination address from .plt, but hasn't jumped yet,
1005	 * thus avoiding the breakpoint.  */
1006
1007	return CBS_CONT;
1008}
1009
1010static void
1011ppc_plt_bp_retract(struct breakpoint *bp, struct process *proc)
1012{
1013	/* On PPC64, we rewrite .plt with PLT entry addresses.  This
1014	 * needs to be undone.  Unfortunately, the program may have
1015	 * made decisions based on that value */
1016	if (proc->e_machine == EM_PPC64
1017	    && bp->libsym != NULL
1018	    && bp->libsym->arch.type == PPC_PLT_RESOLVED) {
1019		each_task(proc->leader, NULL, detach_task_cb, bp);
1020		unresolve_plt_slot(proc, bp->libsym->arch.plt_slot_addr,
1021				   bp->libsym->arch.resolved_value);
1022	}
1023}
1024
1025int
1026arch_library_init(struct library *lib)
1027{
1028	return 0;
1029}
1030
1031void
1032arch_library_destroy(struct library *lib)
1033{
1034}
1035
1036int
1037arch_library_clone(struct library *retp, struct library *lib)
1038{
1039	return 0;
1040}
1041
1042int
1043arch_library_symbol_init(struct library_symbol *libsym)
1044{
1045	/* We set type explicitly in the code above, where we have the
1046	 * necessary context.  This is for calls from ltrace-elf.c and
1047	 * such.  */
1048	libsym->arch.type = PPC_DEFAULT;
1049	return 0;
1050}
1051
1052void
1053arch_library_symbol_destroy(struct library_symbol *libsym)
1054{
1055}
1056
1057int
1058arch_library_symbol_clone(struct library_symbol *retp,
1059			  struct library_symbol *libsym)
1060{
1061	retp->arch = libsym->arch;
1062	return 0;
1063}
1064
1065/* For some symbol types, we need to set up custom callbacks.  XXX we
1066 * don't need PROC here, we can store the data in BP if it is of
1067 * interest to us.  */
1068int
1069arch_breakpoint_init(struct process *proc, struct breakpoint *bp)
1070{
1071	bp->arch.irel_libsym = NULL;
1072
1073	/* Artificial and entry-point breakpoints are plain.  */
1074	if (bp->libsym == NULL || bp->libsym->plt_type != LS_TOPLT_EXEC)
1075		return 0;
1076
1077	/* On PPC, secure PLT and prelinked BSS PLT are plain.  */
1078	if (proc->e_machine == EM_PPC
1079	    && bp->libsym->lib->arch.bss_plt_prelinked != 0)
1080		return 0;
1081
1082	/* On PPC64, stub PLT breakpoints are plain.  */
1083	if (proc->e_machine == EM_PPC64
1084	    && bp->libsym->arch.type == PPC64_PLT_STUB)
1085		return 0;
1086
1087	static struct bp_callbacks cbs = {
1088		.on_continue = ppc_plt_bp_continue,
1089		.on_retract = ppc_plt_bp_retract,
1090	};
1091	breakpoint_set_callbacks(bp, &cbs);
1092
1093	/* For JMP_IREL breakpoints, make the breakpoint look
1094	 * artificial by hiding the symbol.  */
1095	if (bp->libsym->arch.type == PPC_PLT_IRELATIVE) {
1096		bp->arch.irel_libsym = bp->libsym;
1097		bp->libsym = NULL;
1098	}
1099
1100	return 0;
1101}
1102
1103void
1104arch_breakpoint_destroy(struct breakpoint *bp)
1105{
1106}
1107
1108int
1109arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp)
1110{
1111	retp->arch = sbp->arch;
1112	return 0;
1113}
1114
1115int
1116arch_process_init(struct process *proc)
1117{
1118	proc->arch.dl_plt_update_bp = NULL;
1119	proc->arch.handler = NULL;
1120	return 0;
1121}
1122
1123void
1124arch_process_destroy(struct process *proc)
1125{
1126}
1127
1128int
1129arch_process_clone(struct process *retp, struct process *proc)
1130{
1131	retp->arch = proc->arch;
1132	return 0;
1133}
1134
1135int
1136arch_process_exec(struct process *proc)
1137{
1138	return arch_process_init(proc);
1139}
1140