ltrace-elf.c revision 673ff510953b65b844a58478aa434120f457c014
1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2006,2010,2011,2012,2013 Petr Machata, Red Hat Inc.
4 * Copyright (C) 2010 Zachary T Welch, CodeSourcery
5 * Copyright (C) 2010 Joe Damato
6 * Copyright (C) 1997,1998,2001,2004,2007,2008,2009 Juan Cespedes
7 * Copyright (C) 2006 Olaf Hering, SUSE Linux GmbH
8 * Copyright (C) 2006 Eric Vaitl, Cisco Systems, Inc.
9 * Copyright (C) 2006 Paul Gilliam, IBM Corporation
10 * Copyright (C) 2006 Ian Wienand
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 * 02110-1301 USA
26 */
27
28#include "config.h"
29
30#include <assert.h>
31#ifdef	__linux__
32#include <endian.h>
33#endif
34#include <errno.h>
35#include <fcntl.h>
36#include <gelf.h>
37#include <inttypes.h>
38#include <search.h>
39#include <stdint.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <strings.h>
44#include <unistd.h>
45
46#include "backend.h"
47#include "filter.h"
48#include "library.h"
49#include "ltrace-elf.h"
50#include "proc.h"
51#include "debug.h"
52#include "options.h"
53
54#ifndef ARCH_HAVE_LTELF_DATA
55int
56arch_elf_init(struct ltelf *lte, struct library *lib)
57{
58	return 0;
59}
60
61void
62arch_elf_destroy(struct ltelf *lte)
63{
64}
65#endif
66
67int
68default_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
69			  const char *a_name, GElf_Rela *rela, size_t ndx,
70			  struct library_symbol **ret)
71{
72	char *name = strdup(a_name);
73	if (name == NULL) {
74	fail_message:
75		fprintf(stderr, "Couldn't create symbol for PLT entry: %s\n",
76			strerror(errno));
77	fail:
78		free(name);
79		return -1;
80	}
81
82	GElf_Addr addr = arch_plt_sym_val(lte, ndx, rela);
83
84	struct library_symbol *libsym = malloc(sizeof(*libsym));
85	if (libsym == NULL)
86		goto fail_message;
87
88	/* XXX The double cast should be removed when
89	 * arch_addr_t becomes integral type.  */
90	arch_addr_t taddr = (arch_addr_t)
91		(uintptr_t)(addr + lte->bias);
92
93	if (library_symbol_init(libsym, taddr, name, 1, LS_TOPLT_EXEC) < 0) {
94		free(libsym);
95		goto fail;
96	}
97
98	libsym->next = *ret;
99	*ret = libsym;
100	return 0;
101}
102
103#ifndef ARCH_HAVE_ADD_PLT_ENTRY
104enum plt_status
105arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
106		       const char *a_name, GElf_Rela *rela, size_t ndx,
107		       struct library_symbol **ret)
108{
109	return PLT_DEFAULT;
110}
111#endif
112
113#ifndef OS_HAVE_ADD_FUNC_ENTRY
114enum plt_status
115os_elf_add_func_entry(struct process *proc, struct ltelf *lte,
116		      const GElf_Sym *sym,
117		      arch_addr_t addr, const char *name,
118		      struct library_symbol **ret)
119{
120	if (GELF_ST_TYPE(sym->st_info) != STT_FUNC) {
121		*ret = NULL;
122		return PLT_OK;
123	} else {
124		return PLT_DEFAULT;
125	}
126}
127#endif
128
129Elf_Data *
130elf_loaddata(Elf_Scn *scn, GElf_Shdr *shdr)
131{
132	Elf_Data *data = elf_getdata(scn, NULL);
133	if (data == NULL || elf_getdata(scn, data) != NULL
134	    || data->d_off || data->d_size != shdr->sh_size)
135		return NULL;
136	return data;
137}
138
139static int
140elf_get_section_if(struct ltelf *lte, Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr,
141		   int (*predicate)(Elf_Scn *, GElf_Shdr *, void *data),
142		   void *data)
143{
144	int i;
145	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
146		Elf_Scn *scn;
147		GElf_Shdr shdr;
148
149		scn = elf_getscn(lte->elf, i);
150		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
151			debug(1, "Couldn't read section or header.");
152			return -1;
153		}
154		if (predicate(scn, &shdr, data)) {
155			*tgt_sec = scn;
156			*tgt_shdr = shdr;
157			return 0;
158		}
159	}
160
161	*tgt_sec = NULL;
162	return 0;
163}
164
165static int
166inside_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
167{
168	GElf_Addr addr = *(GElf_Addr *)data;
169	return addr >= shdr->sh_addr
170		&& addr < shdr->sh_addr + shdr->sh_size;
171}
172
173int
174elf_get_section_covering(struct ltelf *lte, GElf_Addr addr,
175			 Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
176{
177	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
178				  &inside_p, &addr);
179}
180
181static int
182type_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
183{
184	GElf_Word type = *(GElf_Word *)data;
185	return shdr->sh_type == type;
186}
187
188int
189elf_get_section_type(struct ltelf *lte, GElf_Word type,
190		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
191{
192	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
193				  &type_p, &type);
194}
195
196struct section_named_data {
197	struct ltelf *lte;
198	const char *name;
199};
200
201static int
202name_p(Elf_Scn *scn, GElf_Shdr *shdr, void *d)
203{
204	struct section_named_data *data = d;
205	const char *name = elf_strptr(data->lte->elf,
206				      data->lte->ehdr.e_shstrndx,
207				      shdr->sh_name);
208	return strcmp(name, data->name) == 0;
209}
210
211int
212elf_get_section_named(struct ltelf *lte, const char *name,
213		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
214{
215	struct section_named_data data = {
216		.lte = lte,
217		.name = name,
218	};
219	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
220				  &name_p, &data);
221}
222
223static struct elf_each_symbol_t
224each_symbol_in(Elf_Data *symtab, const char *strtab, size_t count,
225	       unsigned i,
226	       enum callback_status (*cb)(GElf_Sym *symbol,
227					  const char *name, void *data),
228	       void *data)
229{
230	for (; i < count; ++i) {
231		GElf_Sym sym;
232		if (gelf_getsym(symtab, i, &sym) == NULL)
233			return (struct elf_each_symbol_t){ i, -2 };
234
235		switch (cb(&sym, strtab + sym.st_name, data)) {
236		case CBS_FAIL:
237			return (struct elf_each_symbol_t){ i, -1 };
238		case CBS_STOP:
239			return (struct elf_each_symbol_t){ i + 1, 0 };
240		case CBS_CONT:
241			break;
242		}
243	}
244
245	return (struct elf_each_symbol_t){ 0, 0 };
246}
247
248/* N.B.: gelf_getsym takes integer argument.  Since negative values
249 * are invalid as indices, we can use the extra bit to encode which
250 * symbol table we are looking into.  ltrace currently doesn't handle
251 * more than two symbol tables anyway, nor does it handle the xindex
252 * stuff.  */
253struct elf_each_symbol_t
254elf_each_symbol(struct ltelf *lte, unsigned start_after,
255		enum callback_status (*cb)(GElf_Sym *symbol,
256					   const char *name, void *data),
257		void *data)
258{
259	unsigned index = start_after == 0 ? 0 : start_after >> 1;
260
261	/* Go through static symbol table first.  */
262	if ((start_after & 0x1) == 0) {
263		struct elf_each_symbol_t st
264			= each_symbol_in(lte->symtab, lte->strtab,
265					 lte->symtab_count, index, cb, data);
266
267		/* If the iteration stopped prematurely, bail out.  */
268		if (st.restart != 0)
269			return ((struct elf_each_symbol_t)
270				{ st.restart << 1, st.status });
271	}
272
273	struct elf_each_symbol_t st
274		= each_symbol_in(lte->dynsym, lte->dynstr, lte->dynsym_count,
275				 index, cb, data);
276	if (st.restart != 0)
277		return ((struct elf_each_symbol_t)
278			{ st.restart << 1 | 0x1, st.status });
279
280	return (struct elf_each_symbol_t){ 0, 0 };
281}
282
283int
284elf_can_read_next(Elf_Data *data, GElf_Xword offset, GElf_Xword size)
285{
286	assert(data != NULL);
287	if (data->d_size < size || offset > data->d_size - size) {
288		debug(1, "Not enough data to read %"PRId64"-byte value"
289		      " at offset %"PRId64".", size, offset);
290		return 0;
291	}
292	return 1;
293}
294
295#define DEF_READER(NAME, SIZE)						\
296	int								\
297	NAME(Elf_Data *data, GElf_Xword offset, uint##SIZE##_t *retp)	\
298	{								\
299		if (!elf_can_read_next(data, offset, SIZE / 8))		\
300			return -1;					\
301									\
302		if (data->d_buf == NULL) /* NODATA section */ {		\
303			*retp = 0;					\
304			return 0;					\
305		}							\
306									\
307		union {							\
308			uint##SIZE##_t dst;				\
309			char buf[0];					\
310		} u;							\
311		memcpy(u.buf, data->d_buf + offset, sizeof(u.dst));	\
312		*retp = u.dst;						\
313		return 0;						\
314	}
315
316DEF_READER(elf_read_u8, 8)
317DEF_READER(elf_read_u16, 16)
318DEF_READER(elf_read_u32, 32)
319DEF_READER(elf_read_u64, 64)
320
321#undef DEF_READER
322
323#define DEF_READER(NAME, SIZE)						\
324	int								\
325	NAME(Elf_Data *data, GElf_Xword *offset, uint##SIZE##_t *retp)	\
326	{								\
327		int rc = elf_read_u##SIZE(data, *offset, retp);		\
328		if (rc < 0)						\
329			return rc;					\
330		*offset += SIZE / 8;					\
331		return 0;						\
332	}
333
334DEF_READER(elf_read_next_u8, 8)
335DEF_READER(elf_read_next_u16, 16)
336DEF_READER(elf_read_next_u32, 32)
337DEF_READER(elf_read_next_u64, 64)
338
339#undef DEF_READER
340
341int
342elf_read_next_uleb128(Elf_Data *data, GElf_Xword *offset, uint64_t *retp)
343{
344	uint64_t result = 0;
345	int shift = 0;
346	int size = 8 * sizeof result;
347
348	while (1) {
349		uint8_t byte;
350		if (elf_read_next_u8(data, offset, &byte) < 0)
351			return -1;
352
353		uint8_t payload = byte & 0x7f;
354		result |= (uint64_t)payload << shift;
355		shift += 7;
356		if (shift > size && byte != 0x1)
357			return -1;
358		if ((byte & 0x80) == 0)
359			break;
360	}
361
362	if (retp != NULL)
363		*retp = result;
364	return 0;
365}
366
367int
368elf_read_uleb128(Elf_Data *data, GElf_Xword offset, uint64_t *retp)
369{
370	return elf_read_next_uleb128(data, &offset, retp);
371}
372
373int
374ltelf_init(struct ltelf *lte, const char *filename)
375{
376	memset(lte, 0, sizeof *lte);
377	lte->fd = open(filename, O_RDONLY);
378	if (lte->fd == -1)
379		return 1;
380
381	elf_version(EV_CURRENT);
382
383#ifdef HAVE_ELF_C_READ_MMAP
384	lte->elf = elf_begin(lte->fd, ELF_C_READ_MMAP, NULL);
385#else
386	lte->elf = elf_begin(lte->fd, ELF_C_READ, NULL);
387#endif
388
389	if (lte->elf == NULL || elf_kind(lte->elf) != ELF_K_ELF) {
390		fprintf(stderr, "\"%s\" is not an ELF file\n", filename);
391		exit(EXIT_FAILURE);
392	}
393
394	if (gelf_getehdr(lte->elf, &lte->ehdr) == NULL) {
395		fprintf(stderr, "can't read ELF header of \"%s\": %s\n",
396			filename, elf_errmsg(-1));
397		exit(EXIT_FAILURE);
398	}
399
400	if (lte->ehdr.e_type != ET_EXEC && lte->ehdr.e_type != ET_DYN) {
401		fprintf(stderr, "\"%s\" is neither an ELF executable"
402			" nor a shared library\n", filename);
403		exit(EXIT_FAILURE);
404	}
405
406	if (1
407#ifdef LT_ELF_MACHINE
408	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS
409		|| lte->ehdr.e_machine != LT_ELF_MACHINE)
410#endif
411#ifdef LT_ELF_MACHINE2
412	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS2
413		|| lte->ehdr.e_machine != LT_ELF_MACHINE2)
414#endif
415#ifdef LT_ELF_MACHINE3
416	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS3
417		|| lte->ehdr.e_machine != LT_ELF_MACHINE3)
418#endif
419		) {
420		fprintf(stderr,
421			"\"%s\" is ELF from incompatible architecture\n",
422			filename);
423		exit(EXIT_FAILURE);
424	}
425
426	VECT_INIT(&lte->plt_relocs, GElf_Rela);
427
428	return 0;
429}
430
431void
432ltelf_destroy(struct ltelf *lte)
433{
434	debug(DEBUG_FUNCTION, "close_elf()");
435	elf_end(lte->elf);
436	close(lte->fd);
437	VECT_DESTROY(&lte->plt_relocs, GElf_Rela, NULL, NULL);
438}
439
440static void
441read_symbol_table(struct ltelf *lte, const char *filename,
442		  Elf_Scn *scn, GElf_Shdr *shdr, const char *name,
443		  Elf_Data **datap, size_t *countp, const char **strsp)
444{
445	*datap = elf_getdata(scn, NULL);
446	*countp = shdr->sh_size / shdr->sh_entsize;
447	if ((*datap == NULL || elf_getdata(scn, *datap) != NULL)
448	    && options.static_filter != NULL) {
449		fprintf(stderr, "Couldn't get data of section"
450			" %s from \"%s\": %s\n",
451			name, filename, elf_errmsg(-1));
452		exit(EXIT_FAILURE);
453	}
454
455	scn = elf_getscn(lte->elf, shdr->sh_link);
456	GElf_Shdr shdr2;
457	if (scn == NULL || gelf_getshdr(scn, &shdr2) == NULL) {
458		fprintf(stderr, "Couldn't get header of section"
459			" #%d from \"%s\": %s\n",
460			shdr->sh_link, filename, elf_errmsg(-1));
461		exit(EXIT_FAILURE);
462	}
463
464	Elf_Data *data = elf_getdata(scn, NULL);
465	if (data == NULL || elf_getdata(scn, data) != NULL
466	    || shdr2.sh_size != data->d_size || data->d_off) {
467		fprintf(stderr, "Couldn't get data of section"
468			" #%d from \"%s\": %s\n",
469			shdr2.sh_link, filename, elf_errmsg(-1));
470		exit(EXIT_FAILURE);
471	}
472
473	*strsp = data->d_buf;
474}
475
476static int
477rel_to_rela(struct ltelf *lte, const GElf_Rel *rel, GElf_Rela *rela)
478{
479	rela->r_offset = rel->r_offset;
480	rela->r_info = rel->r_info;
481
482	Elf_Scn *sec;
483	GElf_Shdr shdr;
484	if (elf_get_section_covering(lte, rel->r_offset, &sec, &shdr) < 0
485	    || sec == NULL)
486		return -1;
487
488	Elf_Data *data = elf_loaddata(sec, &shdr);
489	if (data == NULL)
490		return -1;
491
492	GElf_Xword offset = rel->r_offset - shdr.sh_addr - data->d_off;
493	uint64_t value;
494	if (lte->ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
495		uint32_t tmp;
496		if (elf_read_u32(data, offset, &tmp) < 0)
497			return -1;
498		value = tmp;
499	} else if (elf_read_u64(data, offset, &value) < 0) {
500		return -1;
501	}
502
503	rela->r_addend = value;
504	return 0;
505}
506
507int
508read_relplt(struct ltelf *lte, Elf_Scn *scn, GElf_Shdr *shdr,
509	    struct vect *rela_vec)
510{
511	if (vect_reserve_additional(rela_vec, lte->ehdr.e_shnum) < 0)
512		return -1;
513
514	Elf_Data *relplt = elf_loaddata(scn, shdr);
515	if (relplt == NULL) {
516		fprintf(stderr, "Couldn't load .rel*.plt data.\n");
517		return -1;
518	}
519
520	if ((shdr->sh_size % shdr->sh_entsize) != 0) {
521		fprintf(stderr, ".rel*.plt size (%" PRIx64 "d) not a multiple "
522			"of its sh_entsize (%" PRIx64 "d).\n",
523			shdr->sh_size, shdr->sh_entsize);
524		return -1;
525	}
526
527	GElf_Xword relplt_count = shdr->sh_size / shdr->sh_entsize;
528	GElf_Xword i;
529	for (i = 0; i < relplt_count; ++i) {
530		GElf_Rela rela;
531		if (relplt->d_type == ELF_T_REL) {
532			GElf_Rel rel;
533			if (gelf_getrel(relplt, i, &rel) == NULL
534			    || rel_to_rela(lte, &rel, &rela) < 0)
535				return -1;
536
537		} else if (gelf_getrela(relplt, i, &rela) == NULL) {
538			return -1;
539		}
540
541		if (VECT_PUSHBACK(rela_vec, &rela) < 0)
542			return -1;
543	}
544
545	return 0;
546}
547
548static int
549ltelf_read_elf(struct ltelf *lte, const char *filename)
550{
551	int i;
552	GElf_Addr relplt_addr = 0;
553	GElf_Addr soname_offset = 0;
554	GElf_Xword relplt_size = 0;
555
556	debug(DEBUG_FUNCTION, "ltelf_read_elf(filename=%s)", filename);
557	debug(1, "Reading ELF from %s...", filename);
558
559	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
560		Elf_Scn *scn;
561		GElf_Shdr shdr;
562		const char *name;
563
564		scn = elf_getscn(lte->elf, i);
565		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
566			fprintf(stderr,	"Couldn't get section #%d from"
567				" \"%s\": %s\n", i, filename, elf_errmsg(-1));
568			exit(EXIT_FAILURE);
569		}
570
571		name = elf_strptr(lte->elf, lte->ehdr.e_shstrndx, shdr.sh_name);
572		if (name == NULL) {
573			fprintf(stderr,	"Couldn't get name of section #%d from"
574				" \"%s\": %s\n", i, filename, elf_errmsg(-1));
575			exit(EXIT_FAILURE);
576		}
577
578		if (shdr.sh_type == SHT_SYMTAB) {
579			read_symbol_table(lte, filename,
580					  scn, &shdr, name, &lte->symtab,
581					  &lte->symtab_count, &lte->strtab);
582
583		} else if (shdr.sh_type == SHT_DYNSYM) {
584			read_symbol_table(lte, filename,
585					  scn, &shdr, name, &lte->dynsym,
586					  &lte->dynsym_count, &lte->dynstr);
587
588		} else if (shdr.sh_type == SHT_DYNAMIC) {
589			Elf_Data *data;
590			size_t j;
591
592			lte->dyn_addr = shdr.sh_addr + lte->bias;
593			lte->dyn_sz = shdr.sh_size;
594
595			data = elf_getdata(scn, NULL);
596			if (data == NULL || elf_getdata(scn, data) != NULL) {
597				fprintf(stderr, "Couldn't get .dynamic data"
598					" from \"%s\": %s\n",
599					filename, strerror(errno));
600				exit(EXIT_FAILURE);
601			}
602
603			for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
604				GElf_Dyn dyn;
605
606				if (gelf_getdyn(data, j, &dyn) == NULL) {
607					fprintf(stderr, "Couldn't get .dynamic"
608						" data from \"%s\": %s\n",
609						filename, strerror(errno));
610					exit(EXIT_FAILURE);
611				}
612				if (dyn.d_tag == DT_JMPREL)
613					relplt_addr = dyn.d_un.d_ptr;
614				else if (dyn.d_tag == DT_PLTRELSZ)
615					relplt_size = dyn.d_un.d_val;
616				else if (dyn.d_tag == DT_SONAME)
617					soname_offset = dyn.d_un.d_val;
618			}
619		} else if (shdr.sh_type == SHT_PROGBITS
620			   || shdr.sh_type == SHT_NOBITS) {
621			if (strcmp(name, ".plt") == 0) {
622				lte->plt_addr = shdr.sh_addr;
623				lte->plt_size = shdr.sh_size;
624				lte->plt_data = elf_loaddata(scn, &shdr);
625				if (lte->plt_data == NULL)
626					fprintf(stderr,
627						"Can't load .plt data\n");
628				lte->plt_flags = shdr.sh_flags;
629			}
630#ifdef ARCH_SUPPORTS_OPD
631			else if (strcmp(name, ".opd") == 0) {
632				lte->opd_addr = (GElf_Addr *) (long) shdr.sh_addr;
633				lte->opd_size = shdr.sh_size;
634				lte->opd = elf_rawdata(scn, NULL);
635			}
636#endif
637		}
638	}
639
640	if (lte->dynsym == NULL || lte->dynstr == NULL) {
641		fprintf(stderr, "Couldn't find .dynsym or .dynstr in \"%s\"\n",
642			filename);
643		exit(EXIT_FAILURE);
644	}
645
646	if (!relplt_addr || !lte->plt_addr) {
647		debug(1, "%s has no PLT relocations", filename);
648	} else if (relplt_size == 0) {
649		debug(1, "%s has unknown PLT size", filename);
650	} else {
651		for (i = 1; i < lte->ehdr.e_shnum; ++i) {
652			Elf_Scn *scn;
653			GElf_Shdr shdr;
654
655			scn = elf_getscn(lte->elf, i);
656			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
657				fprintf(stderr, "Couldn't get section header"
658					" from \"%s\": %s\n",
659					filename, elf_errmsg(-1));
660				exit(EXIT_FAILURE);
661			}
662			if (shdr.sh_addr == relplt_addr
663			    && shdr.sh_size == relplt_size) {
664				if (read_relplt(lte, scn, &shdr,
665						&lte->plt_relocs) < 0) {
666					fprintf(stderr, "Couldn't get .rel*.plt"
667						" data from \"%s\": %s\n",
668						filename, elf_errmsg(-1));
669					exit(EXIT_FAILURE);
670				}
671				break;
672			}
673		}
674
675		if (i == lte->ehdr.e_shnum) {
676			fprintf(stderr,
677				"Couldn't find .rel*.plt section in \"%s\"\n",
678				filename);
679			exit(EXIT_FAILURE);
680		}
681	}
682	debug(1, "%s %zd PLT relocations", filename,
683	      vect_size(&lte->plt_relocs));
684
685	if (soname_offset != 0)
686		lte->soname = lte->dynstr + soname_offset;
687
688	return 0;
689}
690
691#ifndef ARCH_HAVE_GET_SYMINFO
692int
693arch_get_sym_info(struct ltelf *lte, const char *filename,
694		  size_t sym_index, GElf_Rela *rela, GElf_Sym *sym)
695{
696	return gelf_getsym(lte->dynsym,
697			   ELF64_R_SYM(rela->r_info), sym) != NULL ? 0 : -1;
698}
699#endif
700
701static void
702mark_chain_latent(struct library_symbol *libsym)
703{
704	for (; libsym != NULL; libsym = libsym->next) {
705		debug(DEBUG_FUNCTION, "marking %s latent", libsym->name);
706		libsym->latent = 1;
707	}
708}
709
710static void
711filter_symbol_chain(struct filter *filter,
712		    struct library_symbol **libsymp, struct library *lib)
713{
714	assert(libsymp != NULL);
715	struct library_symbol **ptr = libsymp;
716	while (*ptr != NULL) {
717		if (filter_matches_symbol(filter, (*ptr)->name, lib)) {
718			ptr = &(*ptr)->next;
719		} else {
720			struct library_symbol *sym = *ptr;
721			*ptr = (*ptr)->next;
722			library_symbol_destroy(sym);
723			free(sym);
724		}
725	}
726}
727
728static void
729delete_symbol_chain(struct library_symbol *libsym)
730{
731	while (libsym != NULL) {
732		struct library_symbol *tmp = libsym->next;
733		library_symbol_destroy(libsym);
734		free(libsym);
735		libsym = tmp;
736	}
737}
738
739static int
740populate_plt(struct process *proc, const char *filename,
741	     struct ltelf *lte, struct library *lib,
742	     int latent_plts)
743{
744	size_t count = vect_size(&lte->plt_relocs);
745	size_t i;
746	for (i = 0; i < count; ++i) {
747		GElf_Rela *rela = VECT_ELEMENT(&lte->plt_relocs, GElf_Rela, i);
748		GElf_Sym sym;
749
750		switch (arch_get_sym_info(lte, filename, i, rela, &sym)) {
751		default:
752			fprintf(stderr,
753				"Couldn't get relocation for symbol #%zd"
754				" from \"%s\": %s\n",
755				i, filename, elf_errmsg(-1));
756			/* Fall through.  */
757		case 1:
758			continue; /* Skip this entry.  */
759		case 0:
760			break;
761		}
762
763		char const *name = lte->dynstr + sym.st_name;
764
765		int matched = filter_matches_symbol(options.plt_filter,
766						    name, lib);
767
768		struct library_symbol *libsym = NULL;
769		switch (arch_elf_add_plt_entry(proc, lte, name,
770					       rela, i, &libsym)) {
771		case PLT_FAIL:
772			return -1;
773
774		case PLT_DEFAULT:
775			/* Add default entry to the beginning of LIBSYM.  */
776			if (default_elf_add_plt_entry(proc, lte, name,
777						      rela, i, &libsym) < 0)
778				return -1;
779			/* Fall through.  */
780		case PLT_OK:
781			/* If we didn't match the PLT entry up there,
782			 * filter the chain to only include the
783			 * matching symbols (but include all if we are
784			 * adding latent symbols).  This is to allow
785			 * arch_elf_add_plt_entry to override the PLT
786			 * symbol's name.  */
787			if (!matched && !latent_plts)
788				filter_symbol_chain(options.plt_filter,
789						    &libsym, lib);
790			if (libsym != NULL) {
791				/* If we are adding those symbols just
792				 * for tracing exports, mark them all
793				 * latent.  */
794				if (!matched && latent_plts)
795					mark_chain_latent(libsym);
796				library_add_symbol(lib, libsym);
797			}
798		}
799	}
800	return 0;
801}
802
803/* When -x rules result in request to trace several aliases, we only
804 * want to add such symbol once.  The only way that those symbols
805 * differ in is their name, e.g. in glibc you have __GI___libc_free,
806 * __cfree, __free, __libc_free, cfree and free all defined on the
807 * same address.  So instead we keep this unique symbol struct for
808 * each address, and replace name in libsym with a shorter variant if
809 * we find it.  */
810struct unique_symbol {
811	arch_addr_t addr;
812	struct library_symbol *libsym;
813};
814
815static int
816unique_symbol_cmp(const void *key, const void *val)
817{
818	const struct unique_symbol *sym_key = key;
819	const struct unique_symbol *sym_val = val;
820	return sym_key->addr != sym_val->addr;
821}
822
823static enum callback_status
824symbol_with_address(struct library_symbol *sym, void *addrptr)
825{
826	return sym->enter_addr == *(arch_addr_t *)addrptr
827		? CBS_STOP : CBS_CONT;
828}
829
830static int
831populate_this_symtab(struct process *proc, const char *filename,
832		     struct ltelf *lte, struct library *lib,
833		     Elf_Data *symtab, const char *strtab, size_t count,
834		     struct library_exported_name **names)
835{
836	/* If a valid NAMES is passed, we pass in *NAMES a list of
837	 * symbol names that this library exports.  */
838	if (names != NULL)
839		*names = NULL;
840
841	/* Using sorted array would be arguably better, but this
842	 * should be well enough for the number of symbols that we
843	 * typically deal with.  */
844	size_t num_symbols = 0;
845	struct unique_symbol *symbols = malloc(sizeof(*symbols) * count);
846	if (symbols == NULL) {
847		fprintf(stderr, "couldn't insert symbols for -x: %s\n",
848			strerror(errno));
849		return -1;
850	}
851
852	GElf_Word secflags[lte->ehdr.e_shnum];
853	size_t i;
854	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
855		Elf_Scn *scn = elf_getscn(lte->elf, i);
856		GElf_Shdr shdr;
857		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
858			secflags[i] = 0;
859		else
860			secflags[i] = shdr.sh_flags;
861	}
862
863	for (i = 0; i < count; ++i) {
864		GElf_Sym sym;
865		if (gelf_getsym(symtab, i, &sym) == NULL) {
866			fprintf(stderr,
867				"couldn't get symbol #%zd from %s: %s\n",
868				i, filename, elf_errmsg(-1));
869			continue;
870		}
871
872		if (sym.st_value == 0 || sym.st_shndx == STN_UNDEF
873		    /* Also ignore any special values besides direct
874		     * section references.  */
875		    || sym.st_shndx >= lte->ehdr.e_shnum)
876			continue;
877
878		/* Find symbol name and snip version.  */
879		const char *orig_name = strtab + sym.st_name;
880		const char *version = strchr(orig_name, '@');
881		size_t len = version != NULL ? (assert(version > orig_name),
882						(size_t)(version - orig_name))
883			: strlen(orig_name);
884		char name[len + 1];
885		memcpy(name, orig_name, len);
886		name[len] = 0;
887
888		/* If we are interested in exports, store this name.  */
889		if (names != NULL) {
890			struct library_exported_name *export
891				= malloc(sizeof *export);
892			char *name_copy = strdup(name);
893
894			if (name_copy == NULL || export == NULL) {
895				free(name_copy);
896				free(export);
897				fprintf(stderr, "Couldn't store symbol %s.  "
898					"Tracing may be incomplete.\n", name);
899			} else {
900				export->name = name_copy;
901				export->own_name = 1;
902				export->next = *names;
903				*names = export;
904			}
905		}
906
907		/* If the symbol is not matched, skip it.  We already
908		 * stored it to export list above.  */
909		if (!filter_matches_symbol(options.static_filter, name, lib))
910			continue;
911
912		arch_addr_t addr = (arch_addr_t)
913			(uintptr_t)(sym.st_value + lte->bias);
914		arch_addr_t naddr;
915
916		/* On arches that support OPD, the value of typical
917		 * function symbol will be a pointer to .opd, but some
918		 * will point directly to .text.  We don't want to
919		 * translate those.  */
920		if (secflags[sym.st_shndx] & SHF_EXECINSTR) {
921			naddr = addr;
922		} else if (arch_translate_address(lte, addr, &naddr) < 0) {
923			fprintf(stderr,
924				"couldn't translate address of %s@%s: %s\n",
925				name, lib->soname, strerror(errno));
926			continue;
927		}
928
929		char *full_name = strdup(name);
930		if (full_name == NULL) {
931			fprintf(stderr, "couldn't copy name of %s@%s: %s\n",
932				name, lib->soname, strerror(errno));
933			continue;
934		}
935
936		struct library_symbol *libsym = NULL;
937		switch (os_elf_add_func_entry(proc, lte, &sym,
938					      naddr, full_name, &libsym)) {
939		case PLT_DEFAULT:;
940			/* Put the default symbol to the chain.  */
941			struct library_symbol *tmp = malloc(sizeof *tmp);
942			if (tmp == NULL
943			    || library_symbol_init(tmp, naddr, full_name, 1,
944						   LS_TOPLT_NONE) < 0) {
945				free(tmp);
946
947				/* Either add the whole bunch, or none
948				 * of it.  Note that for PLT_FAIL we
949				 * don't do this--it's the callee's
950				 * job to clean up after itself before
951				 * it bails out.  */
952				delete_symbol_chain(libsym);
953				libsym = NULL;
954
955		case PLT_FAIL:
956				fprintf(stderr, "Couldn't add symbol %s@%s "
957					"for tracing.\n", name, lib->soname);
958
959				break;
960			}
961
962			full_name = NULL;
963			tmp->next = libsym;
964			libsym = tmp;
965			break;
966
967		case PLT_OK:
968			break;
969		}
970
971		free(full_name);
972
973		struct library_symbol *tmp;
974		for (tmp = libsym; tmp != NULL; ) {
975			/* Look whether we already have a symbol for
976			 * this address.  If not, add this one.  If
977			 * yes, look if we should pick the new symbol
978			 * name.  */
979
980			struct unique_symbol key = { tmp->enter_addr, NULL };
981			struct unique_symbol *unique
982				= lsearch(&key, symbols, &num_symbols,
983					  sizeof *symbols, &unique_symbol_cmp);
984
985			if (unique->libsym == NULL) {
986				unique->libsym = tmp;
987				unique->addr = tmp->enter_addr;
988				tmp = tmp->next;
989			} else {
990				if (strlen(tmp->name)
991				    < strlen(unique->libsym->name)) {
992					library_symbol_set_name
993						(unique->libsym, tmp->name, 1);
994					tmp->name = NULL;
995				}
996				struct library_symbol *next = tmp->next;
997				library_symbol_destroy(tmp);
998				free(tmp);
999				tmp = next;
1000			}
1001		}
1002	}
1003
1004	/* Now we do the union of this set of unique symbols with
1005	 * what's already in the library.  */
1006	for (i = 0; i < num_symbols; ++i) {
1007		struct library_symbol *this_sym = symbols[i].libsym;
1008		assert(this_sym != NULL);
1009		struct library_symbol *other
1010			= library_each_symbol(lib, NULL, symbol_with_address,
1011					      &this_sym->enter_addr);
1012		if (other != NULL) {
1013			library_symbol_destroy(this_sym);
1014			free(this_sym);
1015			symbols[i].libsym = NULL;
1016		}
1017	}
1018
1019	for (i = 0; i < num_symbols; ++i)
1020		if (symbols[i].libsym != NULL)
1021			library_add_symbol(lib, symbols[i].libsym);
1022
1023	free(symbols);
1024	return 0;
1025}
1026
1027static int
1028populate_symtab(struct process *proc, const char *filename,
1029		struct ltelf *lte, struct library *lib,
1030		int symtabs, int exports)
1031{
1032	int status;
1033	if (symtabs && lte->symtab != NULL && lte->strtab != NULL
1034	    && (status = populate_this_symtab(proc, filename, lte, lib,
1035					      lte->symtab, lte->strtab,
1036					      lte->symtab_count, NULL)) < 0)
1037		return status;
1038
1039	/* Check whether we want to trace symbols implemented by this
1040	 * library (-l).  */
1041	struct library_exported_name **names = NULL;
1042	if (exports) {
1043		debug(DEBUG_FUNCTION, "-l matches %s", lib->soname);
1044		names = &lib->exported_names;
1045	}
1046
1047	return populate_this_symtab(proc, filename, lte, lib,
1048				    lte->dynsym, lte->dynstr,
1049				    lte->dynsym_count, names);
1050}
1051
1052static int
1053read_module(struct library *lib, struct process *proc,
1054	    const char *filename, GElf_Addr bias, int main)
1055{
1056	struct ltelf lte;
1057	if (ltelf_init(&lte, filename) < 0)
1058		return -1;
1059
1060	/* XXX When we abstract ABI into a module, this should instead
1061	 * become something like
1062	 *
1063	 *    proc->abi = arch_get_abi(lte.ehdr);
1064	 *
1065	 * The code in ltelf_init needs to be replaced by this logic.
1066	 * Be warned that libltrace.c calls ltelf_init as well to
1067	 * determine whether ABI is supported.  This is to get
1068	 * reasonable error messages when trying to run 64-bit binary
1069	 * with 32-bit ltrace.  It is desirable to preserve this.  */
1070	proc->e_machine = lte.ehdr.e_machine;
1071	proc->e_class = lte.ehdr.e_ident[EI_CLASS];
1072	get_arch_dep(proc);
1073
1074	/* Find out the base address.  For PIE main binaries we look
1075	 * into auxv, otherwise we scan phdrs.  */
1076	if (main && lte.ehdr.e_type == ET_DYN) {
1077		arch_addr_t entry;
1078		if (process_get_entry(proc, &entry, NULL) < 0) {
1079			fprintf(stderr, "Couldn't find entry of PIE %s\n",
1080				filename);
1081		fail:
1082			ltelf_destroy(&lte);
1083			return -1;
1084		}
1085		/* XXX The double cast should be removed when
1086		 * arch_addr_t becomes integral type.  */
1087		lte.entry_addr = (GElf_Addr)(uintptr_t)entry;
1088		lte.bias = (GElf_Addr)(uintptr_t)entry - lte.ehdr.e_entry;
1089
1090	} else {
1091		GElf_Phdr phdr;
1092		size_t i;
1093		for (i = 0; gelf_getphdr (lte.elf, i, &phdr) != NULL; ++i) {
1094			if (phdr.p_type == PT_LOAD) {
1095				lte.base_addr = phdr.p_vaddr + bias;
1096				break;
1097			}
1098		}
1099
1100		lte.bias = bias;
1101		lte.entry_addr = lte.ehdr.e_entry + lte.bias;
1102
1103		if (lte.base_addr == 0) {
1104			fprintf(stderr,
1105				"Couldn't determine base address of %s\n",
1106				filename);
1107			goto fail;
1108		}
1109	}
1110
1111	if (ltelf_read_elf(&lte, filename) < 0)
1112		goto fail;
1113
1114	if (arch_elf_init(&lte, lib) < 0) {
1115		fprintf(stderr, "Backend initialization failed.\n");
1116		goto fail;
1117	}
1118
1119	if (lib == NULL)
1120		goto fail;
1121
1122	/* Note that we set soname and pathname as soon as they are
1123	 * allocated, so in case of further errors, this get released
1124	 * when LIB is released, which should happen in the caller
1125	 * when we return error.  */
1126
1127	if (lib->pathname == NULL) {
1128		char *pathname = strdup(filename);
1129		if (pathname == NULL)
1130			goto fail;
1131		library_set_pathname(lib, pathname, 1);
1132	}
1133
1134	if (lte.soname != NULL) {
1135		char *soname = strdup(lte.soname);
1136		if (soname == NULL)
1137			goto fail;
1138		library_set_soname(lib, soname, 1);
1139	} else {
1140		const char *soname = rindex(lib->pathname, '/');
1141		if (soname != NULL)
1142			soname += 1;
1143		else
1144			soname = lib->pathname;
1145		library_set_soname(lib, soname, 0);
1146	}
1147
1148	/* XXX The double cast should be removed when
1149	 * arch_addr_t becomes integral type.  */
1150	arch_addr_t entry = (arch_addr_t)(uintptr_t)lte.entry_addr;
1151	if (arch_translate_address(&lte, entry, &entry) < 0)
1152		goto fail;
1153
1154	/* XXX The double cast should be removed when
1155	 * arch_addr_t becomes integral type.  */
1156	lib->base = (arch_addr_t)(uintptr_t)lte.base_addr;
1157	lib->entry = entry;
1158	/* XXX The double cast should be removed when
1159	 * arch_addr_t becomes integral type.  */
1160	lib->dyn_addr = (arch_addr_t)(uintptr_t)lte.dyn_addr;
1161
1162	/* There are two reasons that we need to inspect symbol tables
1163	 * or populate PLT entries.  Either the user requested
1164	 * corresponding tracing features (respectively -x and -e), or
1165	 * they requested tracing exported symbols (-l).
1166	 *
1167	 * In the latter case we need to keep even those PLT slots
1168	 * that are not requested by -e (but we keep them latent).  We
1169	 * also need to inspect .dynsym to find what exports this
1170	 * library provide, to turn on existing latent PLT
1171	 * entries.  */
1172
1173	int plts = filter_matches_library(options.plt_filter, lib);
1174	if ((plts || options.export_filter != NULL)
1175	    && populate_plt(proc, filename, &lte, lib,
1176			    options.export_filter != NULL) < 0)
1177		goto fail;
1178
1179	int exports = filter_matches_library(options.export_filter, lib);
1180	int symtabs = filter_matches_library(options.static_filter, lib);
1181	if ((symtabs || exports)
1182	    && populate_symtab(proc, filename, &lte, lib,
1183			       symtabs, exports) < 0)
1184		goto fail;
1185
1186	arch_elf_destroy(&lte);
1187	ltelf_destroy(&lte);
1188	return 0;
1189}
1190
1191int
1192ltelf_read_library(struct library *lib, struct process *proc,
1193		   const char *filename, GElf_Addr bias)
1194{
1195	return read_module(lib, proc, filename, bias, 0);
1196}
1197
1198
1199struct library *
1200ltelf_read_main_binary(struct process *proc, const char *path)
1201{
1202	struct library *lib = malloc(sizeof(*lib));
1203	if (lib == NULL || library_init(lib, LT_LIBTYPE_MAIN) < 0) {
1204		free(lib);
1205		return NULL;
1206	}
1207	library_set_pathname(lib, path, 0);
1208
1209	/* There is a race between running the process and reading its
1210	 * binary for internal consumption.  So open the binary from
1211	 * the /proc filesystem.  XXX Note that there is similar race
1212	 * for libraries, but there we don't have a nice answer like
1213	 * that.  Presumably we could read the DSOs from the process
1214	 * memory image, but that's not currently done.  */
1215	char *fname = pid2name(proc->pid);
1216	if (fname == NULL
1217	    || read_module(lib, proc, fname, 0, 1) < 0) {
1218		library_destroy(lib);
1219		free(lib);
1220		lib = NULL;
1221	}
1222
1223	free(fname);
1224	return lib;
1225}
1226