ltrace-elf.c revision fc6ff18285b81c7c3f248be3667e8c74c2910cd8
1#include "config.h"
2
3#include <endian.h>
4#include <errno.h>
5#include <error.h>
6#include <fcntl.h>
7#include <gelf.h>
8#include <inttypes.h>
9#include <stdint.h>
10#include <stdlib.h>
11#include <string.h>
12#include <unistd.h>
13#include <assert.h>
14
15#include "common.h"
16#include "proc.h"
17#include "library.h"
18#include "filter.h"
19
20#ifdef PLT_REINITALISATION_BP
21extern char *PLTs_initialized_by_here;
22#endif
23
24#ifndef DT_PPC_GOT
25# define DT_PPC_GOT		(DT_LOPROC + 0)
26#endif
27
28
29#ifndef ARCH_HAVE_LTELF_DATA
30int
31arch_elf_init(struct ltelf *lte)
32{
33	return 0;
34}
35
36void
37arch_elf_destroy(struct ltelf *lte)
38{
39}
40#endif
41
42int
43default_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
44			  const char *a_name, GElf_Rela *rela, size_t ndx,
45			  struct library_symbol **ret)
46{
47	char *name = strdup(a_name);
48	if (name == NULL) {
49	fail:
50		free(name);
51		return -1;
52	}
53
54	enum toplt pltt = PLTS_ARE_EXECUTABLE(lte)
55		?  LS_TOPLT_EXEC : LS_TOPLT_POINT;
56	GElf_Addr addr = arch_plt_sym_val(lte, ndx, rela);
57
58	struct library_symbol *libsym = malloc(sizeof(*libsym));
59	if (libsym == NULL)
60		goto fail;
61
62	target_address_t taddr = (target_address_t)(addr + lte->bias);
63
64	/* The logic behind this conditional translation is as
65	 * follows.  PLT entries do not typically need custom TOC
66	 * pointer, and therefore aren't redirected via OPD.  POINT
67	 * PLT, on the other hand, most likely contains addresses of
68	 * target functions, not PLT entries themselves, and would
69	 * need the OPD redirection.  */
70	if (pltt == LS_TOPLT_POINT
71	    && arch_translate_address(proc, taddr, &taddr) < 0) {
72		free(libsym);
73		goto fail;
74	}
75
76	library_symbol_init(libsym, taddr, name, 1, pltt);
77	*ret = libsym;
78	return 0;
79}
80
81#ifndef ARCH_HAVE_ADD_PLT_ENTRY
82enum plt_status
83arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
84		       const char *a_name, GElf_Rela *rela, size_t ndx,
85		       struct library_symbol **ret)
86{
87	return plt_default;
88}
89#endif
90
91Elf_Data *
92elf_loaddata(Elf_Scn *scn, GElf_Shdr *shdr)
93{
94	Elf_Data *data = elf_getdata(scn, NULL);
95	if (data == NULL || elf_getdata(scn, data) != NULL
96	    || data->d_off || data->d_size != shdr->sh_size)
97		return NULL;
98	return data;
99}
100
101static int
102elf_get_section_if(struct ltelf *lte, Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr,
103		   int (*predicate)(Elf_Scn *, GElf_Shdr *, void *data),
104		   void *data)
105{
106	int i;
107	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
108		Elf_Scn *scn;
109		GElf_Shdr shdr;
110
111		scn = elf_getscn(lte->elf, i);
112		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
113			debug(1, "Couldn't read section or header.");
114			return -1;
115		}
116		if (predicate(scn, &shdr, data)) {
117			*tgt_sec = scn;
118			*tgt_shdr = shdr;
119			return 0;
120		}
121	}
122	return -1;
123
124}
125
126static int
127inside_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
128{
129	GElf_Addr addr = *(GElf_Addr *)data;
130	return addr >= shdr->sh_addr
131		&& addr < shdr->sh_addr + shdr->sh_size;
132}
133
134int
135elf_get_section_covering(struct ltelf *lte, GElf_Addr addr,
136			 Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
137{
138	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
139				  &inside_p, &addr);
140}
141
142static int
143type_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
144{
145	GElf_Word type = *(GElf_Word *)data;
146	return shdr->sh_type == type;
147}
148
149int
150elf_get_section_type(struct ltelf *lte, GElf_Word type,
151		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
152{
153	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
154				  &type_p, &type);
155}
156
157static int
158need_data(Elf_Data *data, size_t offset, size_t size)
159{
160	assert(data != NULL);
161	if (data->d_size < size || offset > data->d_size - size) {
162		debug(1, "Not enough data to read %zd-byte value"
163		      " at offset %zd.", size, offset);
164		return -1;
165	}
166	return 0;
167}
168
169#define DEF_READER(NAME, SIZE)						\
170	int								\
171	NAME(Elf_Data *data, size_t offset, uint##SIZE##_t *retp)	\
172	{								\
173		if (!need_data(data, offset, SIZE / 8) < 0)		\
174			return -1;					\
175									\
176		if (data->d_buf == NULL) /* NODATA section */ {		\
177			*retp = 0;					\
178			return 0;					\
179		}							\
180									\
181		union {							\
182			uint##SIZE##_t dst;				\
183			char buf[0];					\
184		} u;							\
185		memcpy(u.buf, data->d_buf + offset, sizeof(u.dst));	\
186		*retp = u.dst;						\
187		return 0;						\
188	}
189
190DEF_READER(elf_read_u16, 16)
191DEF_READER(elf_read_u32, 32)
192DEF_READER(elf_read_u64, 64)
193
194#undef DEF_READER
195
196int
197open_elf(struct ltelf *lte, const char *filename)
198{
199	lte->fd = open(filename, O_RDONLY);
200	if (lte->fd == -1)
201		return 1;
202
203	elf_version(EV_CURRENT);
204
205#ifdef HAVE_ELF_C_READ_MMAP
206	lte->elf = elf_begin(lte->fd, ELF_C_READ_MMAP, NULL);
207#else
208	lte->elf = elf_begin(lte->fd, ELF_C_READ, NULL);
209#endif
210
211	if (lte->elf == NULL || elf_kind(lte->elf) != ELF_K_ELF)
212		error(EXIT_FAILURE, 0, "Can't open ELF file \"%s\"", filename);
213
214	if (gelf_getehdr(lte->elf, &lte->ehdr) == NULL)
215		error(EXIT_FAILURE, 0, "Can't read ELF header of \"%s\"",
216		      filename);
217
218	if (lte->ehdr.e_type != ET_EXEC && lte->ehdr.e_type != ET_DYN)
219		error(EXIT_FAILURE, 0,
220		      "\"%s\" is not an ELF executable nor shared library",
221		      filename);
222
223	if ((lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS
224	     || lte->ehdr.e_machine != LT_ELF_MACHINE)
225#ifdef LT_ELF_MACHINE2
226	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS2
227		|| lte->ehdr.e_machine != LT_ELF_MACHINE2)
228#endif
229#ifdef LT_ELF_MACHINE3
230	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS3
231		|| lte->ehdr.e_machine != LT_ELF_MACHINE3)
232#endif
233	    )
234		error(EXIT_FAILURE, 0,
235		      "\"%s\" is ELF from incompatible architecture", filename);
236
237	return 0;
238}
239
240static int
241do_init_elf(struct ltelf *lte, const char *filename, GElf_Addr bias)
242{
243	int i;
244	GElf_Addr relplt_addr = 0;
245	GElf_Addr soname_offset = 0;
246
247	debug(DEBUG_FUNCTION, "do_init_elf(filename=%s)", filename);
248	debug(1, "Reading ELF from %s...", filename);
249
250	if (open_elf(lte, filename) < 0)
251		return -1;
252
253	/* Find out the base address.  */
254	{
255		GElf_Phdr phdr;
256		for (i = 0; gelf_getphdr (lte->elf, i, &phdr) != NULL; ++i) {
257			if (phdr.p_type == PT_LOAD) {
258				lte->base_addr = phdr.p_vaddr + bias;
259				fprintf(stderr,
260					" + vaddr=%#lx, bias=%#lx, base=%#lx\n",
261					phdr.p_vaddr, bias, lte->base_addr);
262				break;
263			}
264		}
265	}
266
267	if (lte->base_addr == 0) {
268		fprintf(stderr, "Couldn't determine base address of %s\n",
269			filename);
270		return -1;
271	}
272
273	lte->bias = bias;
274	lte->entry_addr = lte->ehdr.e_entry + lte->bias;
275
276	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
277		Elf_Scn *scn;
278		GElf_Shdr shdr;
279		const char *name;
280
281		scn = elf_getscn(lte->elf, i);
282		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
283			error(EXIT_FAILURE, 0,
284			      "Couldn't get section header from \"%s\"",
285			      filename);
286
287		name = elf_strptr(lte->elf, lte->ehdr.e_shstrndx, shdr.sh_name);
288		if (name == NULL)
289			error(EXIT_FAILURE, 0,
290			      "Couldn't get section header from \"%s\"",
291			      filename);
292
293		if (shdr.sh_type == SHT_SYMTAB) {
294			Elf_Data *data;
295
296			lte->symtab = elf_getdata(scn, NULL);
297			lte->symtab_count = shdr.sh_size / shdr.sh_entsize;
298			if ((lte->symtab == NULL
299			     || elf_getdata(scn, lte->symtab) != NULL)
300			    && options.static_filter != NULL)
301				error(EXIT_FAILURE, 0,
302				      "Couldn't get .symtab data from \"%s\"",
303				      filename);
304
305			scn = elf_getscn(lte->elf, shdr.sh_link);
306			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
307				error(EXIT_FAILURE, 0,
308				      "Couldn't get section header from \"%s\"",
309				      filename);
310
311			data = elf_getdata(scn, NULL);
312			if (data == NULL || elf_getdata(scn, data) != NULL
313			    || shdr.sh_size != data->d_size || data->d_off)
314				error(EXIT_FAILURE, 0,
315				      "Couldn't get .strtab data from \"%s\"",
316				      filename);
317
318			lte->strtab = data->d_buf;
319		} else if (shdr.sh_type == SHT_DYNSYM) {
320			Elf_Data *data;
321
322			lte->dynsym = elf_getdata(scn, NULL);
323			lte->dynsym_count = shdr.sh_size / shdr.sh_entsize;
324			if (lte->dynsym == NULL
325			    || elf_getdata(scn, lte->dynsym) != NULL)
326				error(EXIT_FAILURE, 0,
327				      "Couldn't get .dynsym data from \"%s\"",
328				      filename);
329
330			scn = elf_getscn(lte->elf, shdr.sh_link);
331			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
332				error(EXIT_FAILURE, 0,
333				      "Couldn't get section header from \"%s\"",
334				      filename);
335
336			data = elf_getdata(scn, NULL);
337			if (data == NULL || elf_getdata(scn, data) != NULL
338			    || shdr.sh_size != data->d_size || data->d_off)
339				error(EXIT_FAILURE, 0,
340				      "Couldn't get .dynstr data from \"%s\"",
341				      filename);
342
343			lte->dynstr = data->d_buf;
344		} else if (shdr.sh_type == SHT_DYNAMIC) {
345			Elf_Data *data;
346			size_t j;
347
348			lte->dyn_addr = shdr.sh_addr;
349			fprintf(stderr, "dyn_addr = %#lx\n", lte->dyn_addr);
350			lte->dyn_sz = shdr.sh_size;
351
352			data = elf_getdata(scn, NULL);
353			if (data == NULL || elf_getdata(scn, data) != NULL)
354				error(EXIT_FAILURE, 0,
355				      "Couldn't get .dynamic data from \"%s\"",
356				      filename);
357
358			for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
359				GElf_Dyn dyn;
360
361				if (gelf_getdyn(data, j, &dyn) == NULL)
362					error(EXIT_FAILURE, 0,
363					      "Couldn't get .dynamic data from \"%s\"",
364					      filename);
365				if (dyn.d_tag == DT_JMPREL)
366					relplt_addr = dyn.d_un.d_ptr;
367				else if (dyn.d_tag == DT_PLTRELSZ)
368					lte->relplt_size = dyn.d_un.d_val;
369				else if (dyn.d_tag == DT_SONAME)
370					soname_offset = dyn.d_un.d_val;
371			}
372		} else if (shdr.sh_type == SHT_PROGBITS
373			   || shdr.sh_type == SHT_NOBITS) {
374			if (strcmp(name, ".plt") == 0) {
375				lte->plt_addr = shdr.sh_addr;
376				lte->plt_size = shdr.sh_size;
377				lte->plt_data = elf_loaddata(scn, &shdr);
378				if (lte->plt_data == NULL)
379					fprintf(stderr,
380						"Can't load .plt data\n");
381				if (shdr.sh_flags & SHF_EXECINSTR)
382					lte->lte_flags |= LTE_PLT_EXECUTABLE;
383			}
384#ifdef ARCH_SUPPORTS_OPD
385			else if (strcmp(name, ".opd") == 0) {
386				lte->opd_addr = (GElf_Addr *) (long) shdr.sh_addr;
387				lte->opd_size = shdr.sh_size;
388				lte->opd = elf_rawdata(scn, NULL);
389			}
390#endif
391		}
392	}
393
394	if (lte->dynsym == NULL || lte->dynstr == NULL)
395		error(EXIT_FAILURE, 0,
396		      "Couldn't find .dynsym or .dynstr in \"%s\"", filename);
397
398	if (!relplt_addr || !lte->plt_addr) {
399		debug(1, "%s has no PLT relocations", filename);
400		lte->relplt = NULL;
401		lte->relplt_count = 0;
402	} else if (lte->relplt_size == 0) {
403		debug(1, "%s has unknown PLT size", filename);
404		lte->relplt = NULL;
405		lte->relplt_count = 0;
406	} else {
407
408		for (i = 1; i < lte->ehdr.e_shnum; ++i) {
409			Elf_Scn *scn;
410			GElf_Shdr shdr;
411
412			scn = elf_getscn(lte->elf, i);
413			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
414				error(EXIT_FAILURE, 0,
415				      "Couldn't get section header from \"%s\"",
416				      filename);
417			if (shdr.sh_addr == relplt_addr
418			    && shdr.sh_size == lte->relplt_size) {
419				lte->relplt = elf_getdata(scn, NULL);
420				lte->relplt_count =
421				    shdr.sh_size / shdr.sh_entsize;
422				if (lte->relplt == NULL
423				    || elf_getdata(scn, lte->relplt) != NULL)
424					error(EXIT_FAILURE, 0,
425					      "Couldn't get .rel*.plt data from \"%s\"",
426					      filename);
427				break;
428			}
429		}
430
431		if (i == lte->ehdr.e_shnum)
432			error(EXIT_FAILURE, 0,
433			      "Couldn't find .rel*.plt section in \"%s\"",
434			      filename);
435
436		debug(1, "%s %zd PLT relocations", filename, lte->relplt_count);
437	}
438
439	if (soname_offset != 0)
440		lte->soname = lte->dynstr + soname_offset;
441
442	if (arch_elf_init(lte) < 0) {
443		fprintf(stderr, "Backend initialization failed.\n");
444		return -1;
445	}
446
447	return 0;
448}
449
450/* XXX temporarily non-static */
451void
452do_close_elf(struct ltelf *lte) {
453	debug(DEBUG_FUNCTION, "do_close_elf()");
454	arch_elf_destroy(lte);
455	elf_end(lte->elf);
456	close(lte->fd);
457}
458
459static int
460populate_plt(struct Process *proc, const char *filename,
461	     struct ltelf *lte, struct library *lib)
462{
463	size_t i;
464	for (i = 0; i < lte->relplt_count; ++i) {
465		GElf_Rel rel;
466		GElf_Rela rela;
467		GElf_Sym sym;
468		void *ret;
469
470		if (lte->relplt->d_type == ELF_T_REL) {
471			ret = gelf_getrel(lte->relplt, i, &rel);
472			rela.r_offset = rel.r_offset;
473			rela.r_info = rel.r_info;
474			rela.r_addend = 0;
475		} else {
476			ret = gelf_getrela(lte->relplt, i, &rela);
477		}
478
479		if (ret == NULL
480		    || ELF64_R_SYM(rela.r_info) >= lte->dynsym_count
481		    || gelf_getsym(lte->dynsym, ELF64_R_SYM(rela.r_info),
482				   &sym) == NULL)
483			error(EXIT_FAILURE, 0,
484			      "Couldn't get relocation from \"%s\"",
485			      filename);
486
487		char const *name = lte->dynstr + sym.st_name;
488
489		if (!filter_matches_symbol(options.plt_filter, name, lib))
490			continue;
491
492		fprintf(stderr, "%s@%s matches\n", name, lib->soname);
493
494		struct library_symbol *libsym;
495		switch (arch_elf_add_plt_entry(proc, lte, name,
496					       &rela, i, &libsym)) {
497		case plt_default:
498			if (default_elf_add_plt_entry(proc, lte, name,
499						      &rela, i, &libsym) < 0)
500		case plt_fail:
501				return -1;
502		case plt_ok:
503			if (libsym != NULL)
504				library_add_symbol(lib, libsym);
505		}
506	}
507	return 0;
508}
509
510static int
511populate_this_symtab(struct Process *proc, const char *filename,
512		     struct ltelf *lte, struct library *lib,
513		     Elf_Data *symtab, const char *strtab, size_t size)
514{
515	size_t lib_len = strlen(lib->soname);
516	size_t i;
517	for (i = 0; i < size; ++i) {
518		GElf_Sym sym;
519		if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
520		fail:
521			error(0, errno, "couldn't get symbol #%zd from %s: %s",
522			      i, filename, elf_errmsg(-1));
523			continue;
524		}
525
526		if (sym.st_value == 0)
527			continue;
528
529		const char *name = strtab + sym.st_name;
530		if (!filter_matches_symbol(options.static_filter, name, lib))
531			continue;
532		fprintf(stderr, "%s@%s matches\n", name, lib->soname);
533
534		char *full_name = malloc(strlen(name) + 1 + lib_len + 1);
535		if (full_name == NULL)
536			goto fail;
537		sprintf(full_name, "%s@%s", name, lib->soname);
538
539		target_address_t addr
540			= (target_address_t)(sym.st_value + lte->bias);
541		target_address_t naddr;
542		if (arch_translate_address(proc, addr, &naddr) < 0) {
543			error(0, errno, "couldn't translate address of %s@%s",
544			      name, lib->soname);
545			continue;
546		}
547		if (addr != naddr)
548			naddr += lte->bias;
549
550		struct library_symbol *libsym = malloc(sizeof(*libsym));
551		if (libsym == NULL)
552			goto fail;
553
554		library_symbol_init(libsym, naddr, full_name, 1, LS_TOPLT_NONE);
555		library_add_symbol(lib, libsym);
556	}
557	return 0;
558}
559
560static int
561populate_symtab(struct Process *proc, const char *filename,
562		struct ltelf *lte, struct library *lib)
563{
564	if (lte->symtab != NULL && lte->strtab != NULL)
565		return populate_this_symtab(proc, filename, lte, lib,
566					    lte->symtab, lte->strtab,
567					    lte->symtab_count);
568	else
569		return populate_this_symtab(proc, filename, lte, lib,
570					    lte->dynsym, lte->dynstr,
571					    lte->dynsym_count);
572}
573
574int
575ltelf_read_library(struct library *lib, struct Process *proc,
576		   const char *filename, GElf_Addr bias)
577{
578	struct ltelf lte = {};
579	if (do_init_elf(&lte, filename, bias) < 0)
580		return -1;
581	proc->e_machine = lte.ehdr.e_machine;
582
583	int status = 0;
584	if (lib == NULL)
585		goto fail;
586
587	/* Note that we set soname and pathname as soon as they are
588	 * allocated, so in case of further errors, this get released
589	 * when LIB is release, which should happen in the caller when
590	 * we return error.  */
591
592	if (lib->pathname == NULL) {
593		char *pathname = strdup(filename);
594		if (pathname == NULL)
595			goto fail;
596		library_set_pathname(lib, filename, 1);
597	}
598
599	if (lte.soname != NULL) {
600		char *soname = strdup(lte.soname);
601		if (soname == NULL)
602			goto fail;
603		library_set_soname(lib, soname, 1);
604	} else {
605		const char *soname = rindex(lib->pathname, '/') + 1;
606		if (soname == NULL)
607			soname = lib->pathname;
608		library_set_soname(lib, soname, 0);
609	}
610
611	target_address_t entry = (target_address_t)lte.entry_addr;
612	if (arch_translate_address(proc, entry, &entry) < 0)
613		goto fail;
614
615	lib->base = (target_address_t)lte.base_addr;
616	lib->entry = entry;
617	lib->dyn_addr = (target_address_t)lte.dyn_addr;
618
619	if (filter_matches_library(options.plt_filter, lib)
620	    && populate_plt(proc, filename, &lte, lib) < 0)
621		goto fail;
622
623	if (filter_matches_library(options.static_filter, lib)
624	    && populate_symtab(proc, filename, &lte, lib) < 0)
625		goto fail;
626
627done:
628	do_close_elf(&lte);
629	return status;
630
631fail:
632	status = -1;
633	goto done;
634}
635
636struct library *
637ltelf_read_main_binary(struct Process *proc, const char *path)
638{
639	struct library *lib = malloc(sizeof(*lib));
640	if (lib == NULL)
641		return NULL;
642	library_init(lib, LT_LIBTYPE_MAIN);
643	library_set_pathname(lib, path, 0);
644
645	fprintf(stderr, "ltelf_read_main_binary %d %s\n", proc->pid, path);
646
647	/* There is a race between running the process and reading its
648	 * binary for internal consumption.  So open the binary from
649	 * the /proc filesystem.  XXX Note that there is similar race
650	 * for libraries, but there we don't have a nice answer like
651	 * that.  Presumably we could read the DSOs from the process
652	 * memory image, but that's not currently done.  */
653	char *fname = pid2name(proc->pid);
654	if (ltelf_read_library(lib, proc, fname, 0) < 0) {
655		library_destroy(lib);
656		free(lib);
657		return NULL;
658	}
659
660	return lib;
661}
662