plt.c revision b5fd53993b71d0301b3547287f1a978679b21be2
1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2012 Petr Machata, Red Hat Inc.
4 * Copyright (C) 2004,2008,2009 Juan Cespedes
5 * Copyright (C) 2006 Paul Gilliam
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
20 * 02110-1301 USA
21 */
22
23#include <gelf.h>
24#include <sys/ptrace.h>
25#include <errno.h>
26#include <inttypes.h>
27#include <assert.h>
28#include <string.h>
29
30#include "proc.h"
31#include "common.h"
32#include "library.h"
33#include "breakpoint.h"
34#include "linux-gnu/trace.h"
35#include "backend.h"
36
37/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
38 * new-style "secure" PLT.  We can tell one from the other by the
39 * flags on the .plt section.  If it's +X (executable), it's BSS PLT,
40 * otherwise it's secure.
41 *
42 * BSS PLT works the same way as most architectures: the .plt section
43 * contains trampolines and we put breakpoints to those.  If not
44 * prelinked, .plt contains zeroes, and dynamic linker fills in the
45 * initial set of trampolines, which means that we need to delay
46 * enabling breakpoints until after binary entry point is hit.
47 * Additionally, after first call, dynamic linker updates .plt with
48 * branch to resolved address.  That means that on first hit, we must
49 * do something similar to the PPC64 gambit described below.
50 *
51 * With secure PLT, the .plt section doesn't contain instructions but
52 * addresses.  The real PLT table is stored in .text.  Addresses of
53 * those PLT entries can be computed, and apart from the fact that
54 * they are in .text, they are ordinary PLT entries.
55 *
56 * 64-bit PPC is more involved.  Program linker creates for each
57 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
58 * (where xxxxxxxx is a hexadecimal number).  That stub does the call
59 * dispatch: it loads an address of a function to call from the
60 * section .plt, and branches.  PLT entries themselves are essentially
61 * a curried call to the resolver.  When the symbol is resolved, the
62 * resolver updates the value stored in .plt, and the next time
63 * around, the stub calls the library function directly.  So we make
64 * at most one trip (none if the binary is prelinked) through each PLT
65 * entry, and correspondingly that is useless as a breakpoint site.
66 *
67 * Note the three confusing terms: stubs (that play the role of PLT
68 * entries), PLT entries, .plt section.
69 *
70 * We first check symbol tables and see if we happen to have stub
71 * symbols available.  If yes we just put breakpoints to those, and
72 * treat them as usual breakpoints.  The only tricky part is realizing
73 * that there can be more than one breakpoint per symbol.
74 *
75 * The case that we don't have the stub symbols available is harder.
76 * The following scheme uses two kinds of PLT breakpoints: unresolved
77 * and resolved (to some address).  When the process starts (or when
78 * we attach), we distribute unresolved PLT breakpoints to the PLT
79 * entries (not stubs).  Then we look in .plt, and for each entry
80 * whose value is different than the corresponding PLT entry address,
81 * we assume it was already resolved, and convert the breakpoint to
82 * resolved.  We also rewrite the resolved value in .plt back to the
83 * PLT address.
84 *
85 * When a PLT entry hits a resolved breakpoint (which happens because
86 * we rewrite .plt with the original unresolved addresses), we move
87 * the instruction pointer to the corresponding address and continue
88 * the process as if nothing happened.
89 *
90 * When unresolved PLT entry is called for the first time, we need to
91 * catch the new value that the resolver will write to a .plt slot.
92 * We also need to prevent another thread from racing through and
93 * taking the branch without ltrace noticing.  So when unresolved PLT
94 * entry hits, we have to stop all threads.  We then single-step
95 * through the resolver, until the .plt slot changes.  When it does,
96 * we treat it the same way as above: convert the PLT breakpoint to
97 * resolved, and rewrite the .plt value back to PLT address.  We then
98 * start all threads again.
99 *
100 * As an optimization, we remember the address where the address was
101 * resolved, and put a breakpoint there.  The next time around (when
102 * the next PLT entry is to be resolved), instead of single-stepping
103 * through half the dynamic linker, we just let the thread run and hit
104 * this breakpoint.  When it hits, we know the PLT entry was resolved.
105 *
106 * XXX TODO If we have hardware watch point, we might put a read watch
107 * on .plt slot, and discover the offenders this way.  I don't know
108 * the details, but I assume at most a handful (like, one or two, if
109 * available at all) addresses may be watched at a time, and thus this
110 * would be used as an amendment of the above rather than full-on
111 * solution to PLT tracing on PPC.
112 */
113
114#define PPC_PLT_STUB_SIZE 16
115#define PPC64_PLT_STUB_SIZE 8 //xxx
116
117static inline int
118host_powerpc64()
119{
120#ifdef __powerpc64__
121	return 1;
122#else
123	return 0;
124#endif
125}
126
127int
128read_target_4(struct Process *proc, arch_addr_t addr, uint32_t *lp)
129{
130	unsigned long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
131	if (l == -1UL && errno)
132		return -1;
133#ifdef __powerpc64__
134	l >>= 32;
135#endif
136	*lp = l;
137	return 0;
138}
139
140static int
141read_target_8(struct Process *proc, arch_addr_t addr, uint64_t *lp)
142{
143	unsigned long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
144	if (l == -1UL && errno)
145		return -1;
146	if (host_powerpc64()) {
147		*lp = l;
148	} else {
149		unsigned long l2 = ptrace(PTRACE_PEEKTEXT, proc->pid,
150					  addr + 4, 0);
151		if (l2 == -1UL && errno)
152			return -1;
153		*lp = ((uint64_t)l << 32) | l2;
154	}
155	return 0;
156}
157
158int
159read_target_long(struct Process *proc, arch_addr_t addr, uint64_t *lp)
160{
161	if (proc->e_machine == EM_PPC) {
162		uint32_t w;
163		int ret = read_target_4(proc, addr, &w);
164		if (ret >= 0)
165			*lp = (uint64_t)w;
166		return ret;
167	} else {
168		return read_target_8(proc, addr, lp);
169	}
170}
171
172static void
173mark_as_resolved(struct library_symbol *libsym, GElf_Addr value)
174{
175	libsym->arch.type = PPC_PLT_RESOLVED;
176	libsym->arch.resolved_value = value;
177}
178
179void
180arch_dynlink_done(struct Process *proc)
181{
182	/* On PPC32 with BSS PLT, we need to enable delayed symbols.  */
183	struct library_symbol *libsym = NULL;
184	while ((libsym = proc_each_symbol(proc, libsym,
185					  library_symbol_delayed_cb, NULL))) {
186		if (read_target_8(proc, libsym->enter_addr,
187				  &libsym->arch.resolved_value) < 0) {
188			fprintf(stderr,
189				"couldn't read PLT value for %s(%p): %s\n",
190				libsym->name, libsym->enter_addr,
191				strerror(errno));
192			return;
193		}
194
195		if (proc_activate_delayed_symbol(proc, libsym) < 0)
196			return;
197		/* XXX double cast  */
198		libsym->arch.plt_slot_addr
199			= (GElf_Addr)(uintptr_t)libsym->enter_addr;
200	}
201}
202
203GElf_Addr
204arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
205{
206	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
207		assert(lte->arch.plt_stub_vma != 0);
208		return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
209
210	} else if (lte->ehdr.e_machine == EM_PPC) {
211		return rela->r_offset;
212
213	} else {
214		/* If we get here, we don't have stub symbols.  In
215		 * that case we put brakpoints to PLT entries the same
216		 * as the PPC32 secure PLT case does.  */
217		assert(lte->arch.plt_stub_vma != 0);
218		return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx;
219	}
220}
221
222/* This entry point is called when ltelf is not available
223 * anymore--during runtime.  At that point we don't have to concern
224 * ourselves with bias, as the values in OPD have been resolved
225 * already.  */
226int
227arch_translate_address_dyn(struct Process *proc,
228			   arch_addr_t addr, arch_addr_t *ret)
229{
230	if (proc->e_machine == EM_PPC64) {
231		uint64_t value;
232		if (read_target_8(proc, addr, &value) < 0) {
233			fprintf(stderr,
234				"dynamic .opd translation of %p: %s\n",
235				addr, strerror(errno));
236			return -1;
237		}
238		/* XXX The double cast should be removed when
239		 * arch_addr_t becomes integral type.  */
240		*ret = (arch_addr_t)(uintptr_t)value;
241		return 0;
242	}
243
244	*ret = addr;
245	return 0;
246}
247
248int
249arch_translate_address(struct ltelf *lte,
250		       arch_addr_t addr, arch_addr_t *ret)
251{
252	if (lte->ehdr.e_machine == EM_PPC64) {
253		/* XXX The double cast should be removed when
254		 * arch_addr_t becomes integral type.  */
255		GElf_Xword offset
256			= (GElf_Addr)(uintptr_t)addr - lte->arch.opd_base;
257		uint64_t value;
258		if (elf_read_u64(lte->arch.opd_data, offset, &value) < 0) {
259			fprintf(stderr, "static .opd translation of %p: %s\n",
260				addr, elf_errmsg(-1));
261			return -1;
262		}
263		*ret = (arch_addr_t)(uintptr_t)(value + lte->bias);
264		return 0;
265	}
266
267	*ret = addr;
268	return 0;
269}
270
271static int
272load_opd_data(struct ltelf *lte, struct library *lib)
273{
274	Elf_Scn *sec;
275	GElf_Shdr shdr;
276	if (elf_get_section_named(lte, ".opd", &sec, &shdr) < 0) {
277	fail:
278		fprintf(stderr, "couldn't find .opd data\n");
279		return -1;
280	}
281
282	lte->arch.opd_data = elf_rawdata(sec, NULL);
283	if (lte->arch.opd_data == NULL)
284		goto fail;
285
286	lte->arch.opd_base = shdr.sh_addr + lte->bias;
287	lte->arch.opd_size = shdr.sh_size;
288
289	return 0;
290}
291
292void *
293sym2addr(struct Process *proc, struct library_symbol *sym)
294{
295	return sym->enter_addr;
296}
297
298static GElf_Addr
299get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
300{
301	Elf_Scn *ppcgot_sec = NULL;
302	GElf_Shdr ppcgot_shdr;
303	if (ppcgot != 0
304	    && elf_get_section_covering(lte, ppcgot,
305					&ppcgot_sec, &ppcgot_shdr) < 0)
306		fprintf(stderr,
307			"DT_PPC_GOT=%#"PRIx64", but no such section found\n",
308			ppcgot);
309
310	if (ppcgot_sec != NULL) {
311		Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
312		if (data == NULL || data->d_size < 8 ) {
313			fprintf(stderr, "couldn't read GOT data\n");
314		} else {
315			// where PPCGOT begins in .got
316			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
317			assert(offset % 4 == 0);
318			uint32_t glink_vma;
319			if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
320				fprintf(stderr, "couldn't read glink VMA"
321					" address at %zd@GOT\n", offset);
322				return 0;
323			}
324			if (glink_vma != 0) {
325				debug(1, "PPC GOT glink_vma address: %#" PRIx32,
326				      glink_vma);
327				return (GElf_Addr)glink_vma;
328			}
329		}
330	}
331
332	if (plt_data != NULL) {
333		uint32_t glink_vma;
334		if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
335			fprintf(stderr, "couldn't read glink VMA address\n");
336			return 0;
337		}
338		debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
339		return (GElf_Addr)glink_vma;
340	}
341
342	return 0;
343}
344
345static int
346load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
347{
348	Elf_Scn *scn;
349	GElf_Shdr shdr;
350	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
351	    || scn == NULL) {
352	fail:
353		fprintf(stderr, "Couldn't get SHT_DYNAMIC: %s\n",
354			elf_errmsg(-1));
355		return -1;
356	}
357
358	Elf_Data *data = elf_loaddata(scn, &shdr);
359	if (data == NULL)
360		goto fail;
361
362	size_t j;
363	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
364		GElf_Dyn dyn;
365		if (gelf_getdyn(data, j, &dyn) == NULL)
366			goto fail;
367
368		if(dyn.d_tag == tag) {
369			*valuep = dyn.d_un.d_ptr;
370			return 0;
371		}
372	}
373
374	return -1;
375}
376
377static int
378nonzero_data(Elf_Data *data)
379{
380	/* We are not supposed to get here if there's no PLT.  */
381	assert(data != NULL);
382
383	unsigned char *buf = data->d_buf;
384	if (buf == NULL)
385		return 0;
386
387	size_t i;
388	for (i = 0; i < data->d_size; ++i)
389		if (buf[i] != 0)
390			return 1;
391	return 0;
392}
393
394int
395arch_elf_init(struct ltelf *lte, struct library *lib)
396{
397	if (lte->ehdr.e_machine == EM_PPC64
398	    && load_opd_data(lte, lib) < 0)
399		return -1;
400
401	lte->arch.secure_plt = !(lte->plt_flags & SHF_EXECINSTR);
402
403	/* For PPC32 BSS, it is important whether the binary was
404	 * prelinked.  If .plt section is NODATA, or if it contains
405	 * zeroes, then this library is not prelinked, and we need to
406	 * delay breakpoints.  */
407	if (lte->ehdr.e_machine == EM_PPC && !lte->arch.secure_plt)
408		lib->arch.bss_plt_prelinked = nonzero_data(lte->plt_data);
409	else
410		/* For cases where it's irrelevant, initialize the
411		 * value to something conspicuous.  */
412		lib->arch.bss_plt_prelinked = -1;
413
414	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
415		GElf_Addr ppcgot;
416		if (load_dynamic_entry(lte, DT_PPC_GOT, &ppcgot) < 0) {
417			fprintf(stderr, "couldn't find DT_PPC_GOT\n");
418			return -1;
419		}
420		GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
421
422		assert(lte->relplt_size % 12 == 0);
423		size_t count = lte->relplt_size / 12; // size of RELA entry
424		lte->arch.plt_stub_vma = glink_vma
425			- (GElf_Addr)count * PPC_PLT_STUB_SIZE;
426		debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
427
428	} else if (lte->ehdr.e_machine == EM_PPC64) {
429		GElf_Addr glink_vma;
430		if (load_dynamic_entry(lte, DT_PPC64_GLINK, &glink_vma) < 0) {
431			fprintf(stderr, "couldn't find DT_PPC64_GLINK\n");
432			return -1;
433		}
434
435		/* The first glink stub starts at offset 32.  */
436		lte->arch.plt_stub_vma = glink_vma + 32;
437	}
438
439	/* On PPC64, look for stub symbols in symbol table.  These are
440	 * called: xxxxxxxx.plt_call.callee_name@version+addend.  */
441	if (lte->ehdr.e_machine == EM_PPC64
442	    && lte->symtab != NULL && lte->strtab != NULL) {
443
444		/* N.B. We can't simply skip the symbols that we fail
445		 * to read or malloc.  There may be more than one stub
446		 * per symbol name, and if we failed in one but
447		 * succeeded in another, the PLT enabling code would
448		 * have no way to tell that something is missing.  We
449		 * could work around that, of course, but it doesn't
450		 * seem worth the trouble.  So if anything fails, we
451		 * just pretend that we don't have stub symbols at
452		 * all, as if the binary is stripped.  */
453
454		size_t i;
455		for (i = 0; i < lte->symtab_count; ++i) {
456			GElf_Sym sym;
457			if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
458				struct library_symbol *sym, *next;
459			fail:
460				for (sym = lte->arch.stubs; sym != NULL; ) {
461					next = sym->next;
462					library_symbol_destroy(sym);
463					free(sym);
464					sym = next;
465				}
466				lte->arch.stubs = NULL;
467				break;
468			}
469
470			const char *name = lte->strtab + sym.st_name;
471
472#define STUBN ".plt_call."
473			if ((name = strstr(name, STUBN)) == NULL)
474				continue;
475			name += sizeof(STUBN) - 1;
476#undef STUBN
477
478			size_t len;
479			const char *ver = strchr(name, '@');
480			if (ver != NULL) {
481				len = ver - name;
482
483			} else {
484				/* If there is "+" at all, check that
485				 * the symbol name ends in "+0".  */
486				const char *add = strrchr(name, '+');
487				if (add != NULL) {
488					assert(strcmp(add, "+0") == 0);
489					len = add - name;
490				} else {
491					len = strlen(name);
492				}
493			}
494
495			char *sym_name = strndup(name, len);
496			struct library_symbol *libsym = malloc(sizeof(*libsym));
497			if (sym_name == NULL || libsym == NULL) {
498			fail2:
499				free(sym_name);
500				free(libsym);
501				goto fail;
502			}
503
504			/* XXX The double cast should be removed when
505			 * arch_addr_t becomes integral type.  */
506			arch_addr_t addr = (arch_addr_t)
507				(uintptr_t)sym.st_value + lte->bias;
508			if (library_symbol_init(libsym, addr, sym_name, 1,
509						LS_TOPLT_EXEC) < 0)
510				goto fail2;
511			libsym->arch.type = PPC64_PLT_STUB;
512			libsym->next = lte->arch.stubs;
513			lte->arch.stubs = libsym;
514		}
515	}
516
517	return 0;
518}
519
520static int
521read_plt_slot_value(struct Process *proc, GElf_Addr addr, GElf_Addr *valp)
522{
523	/* On PPC64, we read from .plt, which contains 8 byte
524	 * addresses.  On PPC32 we read from .plt, which contains 4
525	 * byte instructions, but the PLT is two instructions, and
526	 * either can change.  */
527	uint64_t l;
528	/* XXX double cast.  */
529	if (read_target_8(proc, (arch_addr_t)(uintptr_t)addr, &l) < 0) {
530		fprintf(stderr, "ptrace .plt slot value @%#" PRIx64": %s\n",
531			addr, strerror(errno));
532		return -1;
533	}
534
535	*valp = (GElf_Addr)l;
536	return 0;
537}
538
539static int
540unresolve_plt_slot(struct Process *proc, GElf_Addr addr, GElf_Addr value)
541{
542	/* We only modify plt_entry[0], which holds the resolved
543	 * address of the routine.  We keep the TOC and environment
544	 * pointers intact.  Hence the only adjustment that we need to
545	 * do is to IP.  */
546	if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) {
547		fprintf(stderr, "failed to unresolve .plt slot: %s\n",
548			strerror(errno));
549		return -1;
550	}
551	return 0;
552}
553
554enum plt_status
555arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
556		       const char *a_name, GElf_Rela *rela, size_t ndx,
557		       struct library_symbol **ret)
558{
559	if (lte->ehdr.e_machine == EM_PPC) {
560		if (lte->arch.secure_plt)
561			return plt_default;
562
563		struct library_symbol *libsym = NULL;
564		if (default_elf_add_plt_entry(proc, lte, a_name, rela, ndx,
565					      &libsym) < 0)
566			return plt_fail;
567
568		/* On PPC32 with BSS PLT, delay the symbol until
569		 * dynamic linker is done.  */
570		assert(!libsym->delayed);
571		libsym->delayed = 1;
572
573		*ret = libsym;
574		return plt_ok;
575	}
576
577	/* PPC64.  If we have stubs, we return a chain of breakpoint
578	 * sites, one for each stub that corresponds to this PLT
579	 * entry.  */
580	struct library_symbol *chain = NULL;
581	struct library_symbol **symp;
582	for (symp = &lte->arch.stubs; *symp != NULL; ) {
583		struct library_symbol *sym = *symp;
584		if (strcmp(sym->name, a_name) != 0) {
585			symp = &(*symp)->next;
586			continue;
587		}
588
589		/* Re-chain the symbol from stubs to CHAIN.  */
590		*symp = sym->next;
591		sym->next = chain;
592		chain = sym;
593	}
594
595	if (chain != NULL) {
596		*ret = chain;
597		return plt_ok;
598	}
599
600	/* We don't have stub symbols.  Find corresponding .plt slot,
601	 * and check whether it contains the corresponding PLT address
602	 * (or 0 if the dynamic linker hasn't run yet).  N.B. we don't
603	 * want read this from ELF file, but from process image.  That
604	 * makes a difference if we are attaching to a running
605	 * process.  */
606
607	GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela);
608	GElf_Addr plt_slot_addr = rela->r_offset;
609	assert(plt_slot_addr >= lte->plt_addr
610	       || plt_slot_addr < lte->plt_addr + lte->plt_size);
611
612	GElf_Addr plt_slot_value;
613	if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0)
614		return plt_fail;
615
616	char *name = strdup(a_name);
617	struct library_symbol *libsym = malloc(sizeof(*libsym));
618	if (name == NULL || libsym == NULL) {
619		fprintf(stderr, "allocation for .plt slot: %s\n",
620			strerror(errno));
621	fail:
622		free(name);
623		free(libsym);
624		return plt_fail;
625	}
626
627	/* XXX The double cast should be removed when
628	 * arch_addr_t becomes integral type.  */
629	if (library_symbol_init(libsym,
630				(arch_addr_t)(uintptr_t)plt_entry_addr,
631				name, 1, LS_TOPLT_EXEC) < 0)
632		goto fail;
633	libsym->arch.plt_slot_addr = plt_slot_addr;
634
635	if (plt_slot_value == plt_entry_addr || plt_slot_value == 0) {
636		libsym->arch.type = PPC_PLT_UNRESOLVED;
637		libsym->arch.resolved_value = plt_entry_addr;
638
639	} else {
640		/* Unresolve the .plt slot.  If the binary was
641		 * prelinked, this makes the code invalid, because in
642		 * case of prelinked binary, the dynamic linker
643		 * doesn't update .plt[0] and .plt[1] with addresses
644		 * of the resover.  But we don't care, we will never
645		 * need to enter the resolver.  That just means that
646		 * we have to un-un-resolve this back before we
647		 * detach.  */
648
649		if (unresolve_plt_slot(proc, plt_slot_addr, plt_entry_addr) < 0) {
650			library_symbol_destroy(libsym);
651			goto fail;
652		}
653		mark_as_resolved(libsym, plt_slot_value);
654	}
655
656	*ret = libsym;
657	return plt_ok;
658}
659
660void
661arch_elf_destroy(struct ltelf *lte)
662{
663	struct library_symbol *sym;
664	for (sym = lte->arch.stubs; sym != NULL; ) {
665		struct library_symbol *next = sym->next;
666		library_symbol_destroy(sym);
667		free(sym);
668		sym = next;
669	}
670}
671
672static void
673dl_plt_update_bp_on_hit(struct breakpoint *bp, struct Process *proc)
674{
675	debug(DEBUG_PROCESS, "pid=%d dl_plt_update_bp_on_hit %s(%p)",
676	      proc->pid, breakpoint_name(bp), bp->addr);
677	struct process_stopping_handler *self = proc->arch.handler;
678	assert(self != NULL);
679
680	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
681	GElf_Addr value;
682	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
683		return;
684
685	/* On PPC64, we rewrite the slot value.  */
686	if (proc->e_machine == EM_PPC64)
687		unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
688				   libsym->arch.resolved_value);
689	/* We mark the breakpoint as resolved on both arches.  */
690	mark_as_resolved(libsym, value);
691
692	/* cb_on_all_stopped looks if HANDLER is set to NULL as a way
693	 * to check that this was run.  It's an error if it
694	 * wasn't.  */
695	proc->arch.handler = NULL;
696
697	breakpoint_turn_off(bp, proc);
698}
699
700static void
701cb_on_all_stopped(struct process_stopping_handler *self)
702{
703	/* Put that in for dl_plt_update_bp_on_hit to see.  */
704	assert(self->task_enabling_breakpoint->arch.handler == NULL);
705	self->task_enabling_breakpoint->arch.handler = self;
706
707	linux_ptrace_disable_and_continue(self);
708}
709
710static enum callback_status
711cb_keep_stepping_p(struct process_stopping_handler *self)
712{
713	struct Process *proc = self->task_enabling_breakpoint;
714	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
715
716	GElf_Addr value;
717	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
718		return CBS_FAIL;
719
720	/* In UNRESOLVED state, the RESOLVED_VALUE in fact contains
721	 * the PLT entry value.  */
722	if (value == libsym->arch.resolved_value)
723		return CBS_CONT;
724
725	debug(DEBUG_PROCESS, "pid=%d PLT got resolved to value %#"PRIx64,
726	      proc->pid, value);
727
728	/* The .plt slot got resolved!  We can migrate the breakpoint
729	 * to RESOLVED and stop single-stepping.  */
730	if (proc->e_machine == EM_PPC64
731	    && unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
732				  libsym->arch.resolved_value) < 0)
733		return CBS_FAIL;
734
735	/* Resolving on PPC64 consists of overwriting a doubleword in
736	 * .plt.  That doubleword is than read back by a stub, and
737	 * jumped on.  Hopefully we can assume that double word update
738	 * is done on a single place only, as it contains a final
739	 * address.  We still need to look around for any sync
740	 * instruction, but essentially it is safe to optimize away
741	 * the single stepping next time and install a post-update
742	 * breakpoint.
743	 *
744	 * The situation on PPC32 BSS is more complicated.  The
745	 * dynamic linker here updates potentially several
746	 * instructions (XXX currently we assume two) and the rules
747	 * are more complicated.  Sometimes it's enough to adjust just
748	 * one of the addresses--the logic for generating optimal
749	 * dispatch depends on relative addresses of the .plt entry
750	 * and the jump destination.  We can't assume that the some
751	 * instruction block does the update every time.  So on PPC32,
752	 * we turn the optimization off and just step through it each
753	 * time.  */
754	if (proc->e_machine == EM_PPC)
755		goto done;
756
757	/* Install breakpoint to the address where the change takes
758	 * place.  If we fail, then that just means that we'll have to
759	 * singlestep the next time around as well.  */
760	struct Process *leader = proc->leader;
761	if (leader == NULL || leader->arch.dl_plt_update_bp != NULL)
762		goto done;
763
764	/* We need to install to the next instruction.  ADDR points to
765	 * a store instruction, so moving the breakpoint one
766	 * instruction forward is safe.  */
767	arch_addr_t addr = get_instruction_pointer(proc) + 4;
768	leader->arch.dl_plt_update_bp = insert_breakpoint(proc, addr, NULL);
769	if (leader->arch.dl_plt_update_bp == NULL)
770		goto done;
771
772	static struct bp_callbacks dl_plt_update_cbs = {
773		.on_hit = dl_plt_update_bp_on_hit,
774	};
775	leader->arch.dl_plt_update_bp->cbs = &dl_plt_update_cbs;
776
777	/* Turn it off for now.  We will turn it on again when we hit
778	 * the PLT entry that needs this.  */
779	breakpoint_turn_off(leader->arch.dl_plt_update_bp, proc);
780
781done:
782	mark_as_resolved(libsym, value);
783
784	return CBS_STOP;
785}
786
787static void
788jump_to_entry_point(struct Process *proc, struct breakpoint *bp)
789{
790	/* XXX The double cast should be removed when
791	 * arch_addr_t becomes integral type.  */
792	arch_addr_t rv = (arch_addr_t)
793		(uintptr_t)bp->libsym->arch.resolved_value;
794	set_instruction_pointer(proc, rv);
795}
796
797static void
798ppc_plt_bp_continue(struct breakpoint *bp, struct Process *proc)
799{
800	switch (bp->libsym->arch.type) {
801		struct Process *leader;
802		void (*on_all_stopped)(struct process_stopping_handler *);
803		enum callback_status (*keep_stepping_p)
804			(struct process_stopping_handler *);
805
806	case PPC_DEFAULT:
807		assert(proc->e_machine == EM_PPC);
808		assert(bp->libsym != NULL);
809		assert(bp->libsym->lib->arch.bss_plt_prelinked == 0);
810		/* Fall through.  */
811
812	case PPC_PLT_UNRESOLVED:
813		on_all_stopped = NULL;
814		keep_stepping_p = NULL;
815		leader = proc->leader;
816
817		if (leader != NULL && leader->arch.dl_plt_update_bp != NULL
818		    && breakpoint_turn_on(leader->arch.dl_plt_update_bp,
819					  proc) >= 0)
820			on_all_stopped = cb_on_all_stopped;
821		else
822			keep_stepping_p = cb_keep_stepping_p;
823
824		if (process_install_stopping_handler
825		    (proc, bp, on_all_stopped, keep_stepping_p, NULL) < 0) {
826			fprintf(stderr,	"ppc_plt_bp_continue: "
827				"couldn't install event handler\n");
828			continue_after_breakpoint(proc, bp);
829		}
830		return;
831
832	case PPC_PLT_RESOLVED:
833		if (proc->e_machine == EM_PPC) {
834			continue_after_breakpoint(proc, bp);
835			return;
836		}
837
838		jump_to_entry_point(proc, bp);
839		continue_process(proc->pid);
840		return;
841
842	case PPC64_PLT_STUB:
843		/* These should never hit here.  */
844		break;
845	}
846
847	assert(bp->libsym->arch.type != bp->libsym->arch.type);
848	abort();
849}
850
851/* When a process is in a PLT stub, it may have already read the data
852 * in .plt that we changed.  If we detach now, it will jump to PLT
853 * entry and continue to the dynamic linker, where it will SIGSEGV,
854 * because zeroth .plt slot is not filled in prelinked binaries, and
855 * the dynamic linker needs that data.  Moreover, the process may
856 * actually have hit the breakpoint already.  This functions tries to
857 * detect both cases and do any fix-ups necessary to mend this
858 * situation.  */
859static enum callback_status
860detach_task_cb(struct Process *task, void *data)
861{
862	struct breakpoint *bp = data;
863
864	if (get_instruction_pointer(task) == bp->addr) {
865		debug(DEBUG_PROCESS, "%d at %p, which is PLT slot",
866		      task->pid, bp->addr);
867		jump_to_entry_point(task, bp);
868		return CBS_CONT;
869	}
870
871	/* XXX There's still a window of several instructions where we
872	 * might catch the task inside a stub such that it has already
873	 * read destination address from .plt, but hasn't jumped yet,
874	 * thus avoiding the breakpoint.  */
875
876	return CBS_CONT;
877}
878
879static void
880ppc_plt_bp_retract(struct breakpoint *bp, struct Process *proc)
881{
882	/* On PPC64, we rewrite .plt with PLT entry addresses.  This
883	 * needs to be undone.  Unfortunately, the program may have
884	 * made decisions based on that value */
885	if (proc->e_machine == EM_PPC64
886	    && bp->libsym != NULL
887	    && bp->libsym->arch.type == PPC_PLT_RESOLVED) {
888		each_task(proc->leader, NULL, detach_task_cb, bp);
889		unresolve_plt_slot(proc, bp->libsym->arch.plt_slot_addr,
890				   bp->libsym->arch.resolved_value);
891	}
892}
893
894void
895arch_library_init(struct library *lib)
896{
897}
898
899void
900arch_library_destroy(struct library *lib)
901{
902}
903
904void
905arch_library_clone(struct library *retp, struct library *lib)
906{
907}
908
909int
910arch_library_symbol_init(struct library_symbol *libsym)
911{
912	/* We set type explicitly in the code above, where we have the
913	 * necessary context.  This is for calls from ltrace-elf.c and
914	 * such.  */
915	libsym->arch.type = PPC_DEFAULT;
916	return 0;
917}
918
919void
920arch_library_symbol_destroy(struct library_symbol *libsym)
921{
922}
923
924int
925arch_library_symbol_clone(struct library_symbol *retp,
926			  struct library_symbol *libsym)
927{
928	retp->arch = libsym->arch;
929	return 0;
930}
931
932/* For some symbol types, we need to set up custom callbacks.  XXX we
933 * don't need PROC here, we can store the data in BP if it is of
934 * interest to us.  */
935int
936arch_breakpoint_init(struct Process *proc, struct breakpoint *bp)
937{
938	/* Artificial and entry-point breakpoints are plain.  */
939	if (bp->libsym == NULL || bp->libsym->plt_type != LS_TOPLT_EXEC)
940		return 0;
941
942	/* On PPC, secure PLT and prelinked BSS PLT are plain.  */
943	if (proc->e_machine == EM_PPC
944	    && bp->libsym->lib->arch.bss_plt_prelinked != 0)
945		return 0;
946
947	/* On PPC64, stub PLT breakpoints are plain.  */
948	if (proc->e_machine == EM_PPC64
949	    && bp->libsym->arch.type == PPC64_PLT_STUB)
950		return 0;
951
952	static struct bp_callbacks cbs = {
953		.on_continue = ppc_plt_bp_continue,
954		.on_retract = ppc_plt_bp_retract,
955	};
956	breakpoint_set_callbacks(bp, &cbs);
957	return 0;
958}
959
960void
961arch_breakpoint_destroy(struct breakpoint *bp)
962{
963}
964
965int
966arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp)
967{
968	retp->arch = sbp->arch;
969	return 0;
970}
971
972int
973arch_process_init(struct Process *proc)
974{
975	proc->arch.dl_plt_update_bp = NULL;
976	proc->arch.handler = NULL;
977	return 0;
978}
979
980void
981arch_process_destroy(struct Process *proc)
982{
983}
984
985int
986arch_process_clone(struct Process *retp, struct Process *proc)
987{
988	retp->arch = proc->arch;
989	return 0;
990}
991
992int
993arch_process_exec(struct Process *proc)
994{
995	return arch_process_init(proc);
996}
997