ltrace-elf.c revision 4f2f66e6abc7fedf3a5d04fab7cc00e5f82b37cf
1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2006,2010,2011,2012,2013 Petr Machata, Red Hat Inc.
4 * Copyright (C) 2010 Zachary T Welch, CodeSourcery
5 * Copyright (C) 2010 Joe Damato
6 * Copyright (C) 1997,1998,2001,2004,2007,2008,2009 Juan Cespedes
7 * Copyright (C) 2006 Olaf Hering, SUSE Linux GmbH
8 * Copyright (C) 2006 Eric Vaitl, Cisco Systems, Inc.
9 * Copyright (C) 2006 Paul Gilliam, IBM Corporation
10 * Copyright (C) 2006 Ian Wienand
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 * 02110-1301 USA
26 */
27
28#include "config.h"
29
30#include <assert.h>
31#ifdef	__linux__
32#include <endian.h>
33#endif
34#include <errno.h>
35#include <fcntl.h>
36#include <gelf.h>
37#include <inttypes.h>
38#include <search.h>
39#include <stdbool.h>
40#include <stdint.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <strings.h>
45#include <unistd.h>
46
47#include "backend.h"
48#include "filter.h"
49#include "library.h"
50#include "ltrace-elf.h"
51#include "proc.h"
52#include "debug.h"
53#include "options.h"
54
55#ifndef ARCH_HAVE_LTELF_DATA
56int
57arch_elf_init(struct ltelf *lte, struct library *lib)
58{
59	return 0;
60}
61
62void
63arch_elf_destroy(struct ltelf *lte)
64{
65}
66#endif
67
68#ifndef OS_HAVE_ADD_PLT_ENTRY
69enum plt_status
70os_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
71		     const char *a_name, GElf_Rela *rela, size_t ndx,
72		     struct library_symbol **ret)
73{
74	return PLT_DEFAULT;
75}
76#endif
77
78#ifndef ARCH_HAVE_ADD_PLT_ENTRY
79enum plt_status
80arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
81		       const char *a_name, GElf_Rela *rela, size_t ndx,
82		       struct library_symbol **ret)
83{
84	return PLT_DEFAULT;
85}
86#endif
87
88#ifndef OS_HAVE_ADD_FUNC_ENTRY
89enum plt_status
90os_elf_add_func_entry(struct process *proc, struct ltelf *lte,
91		      const GElf_Sym *sym,
92		      arch_addr_t addr, const char *name,
93		      struct library_symbol **ret)
94{
95	if (GELF_ST_TYPE(sym->st_info) != STT_FUNC) {
96		*ret = NULL;
97		return PLT_OK;
98	} else {
99		return PLT_DEFAULT;
100	}
101}
102#endif
103
104#ifndef ARCH_HAVE_ADD_FUNC_ENTRY
105enum plt_status
106arch_elf_add_func_entry(struct process *proc, struct ltelf *lte,
107			const GElf_Sym *sym,
108			arch_addr_t addr, const char *name,
109			struct library_symbol **ret)
110{
111	return PLT_DEFAULT;
112}
113#endif
114
115Elf_Data *
116elf_loaddata(Elf_Scn *scn, GElf_Shdr *shdr)
117{
118	Elf_Data *data = elf_getdata(scn, NULL);
119	if (data == NULL || elf_getdata(scn, data) != NULL
120	    || data->d_off || data->d_size != shdr->sh_size)
121		return NULL;
122	return data;
123}
124
125static int
126elf_get_section_if(struct ltelf *lte, Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr,
127		   int (*predicate)(Elf_Scn *, GElf_Shdr *, void *data),
128		   void *data)
129{
130	int i;
131	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
132		Elf_Scn *scn;
133		GElf_Shdr shdr;
134
135		scn = elf_getscn(lte->elf, i);
136		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
137			debug(1, "Couldn't read section or header.");
138			return -1;
139		}
140		if (predicate(scn, &shdr, data)) {
141			*tgt_sec = scn;
142			*tgt_shdr = shdr;
143			return 0;
144		}
145	}
146
147	*tgt_sec = NULL;
148	return 0;
149}
150
151static int
152inside_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
153{
154	GElf_Addr addr = *(GElf_Addr *)data;
155	return addr >= shdr->sh_addr
156		&& addr < shdr->sh_addr + shdr->sh_size;
157}
158
159int
160elf_get_section_covering(struct ltelf *lte, GElf_Addr addr,
161			 Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
162{
163	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
164				  &inside_p, &addr);
165}
166
167static int
168type_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
169{
170	GElf_Word type = *(GElf_Word *)data;
171	return shdr->sh_type == type;
172}
173
174int
175elf_get_section_type(struct ltelf *lte, GElf_Word type,
176		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
177{
178	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
179				  &type_p, &type);
180}
181
182struct section_named_data {
183	struct ltelf *lte;
184	const char *name;
185};
186
187static int
188name_p(Elf_Scn *scn, GElf_Shdr *shdr, void *d)
189{
190	struct section_named_data *data = d;
191	const char *name = elf_strptr(data->lte->elf,
192				      data->lte->ehdr.e_shstrndx,
193				      shdr->sh_name);
194	return strcmp(name, data->name) == 0;
195}
196
197int
198elf_get_section_named(struct ltelf *lte, const char *name,
199		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
200{
201	struct section_named_data data = {
202		.lte = lte,
203		.name = name,
204	};
205	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
206				  &name_p, &data);
207}
208
209static struct elf_each_symbol_t
210each_symbol_in(Elf_Data *symtab, const char *strtab, size_t count,
211	       unsigned i,
212	       enum callback_status (*cb)(GElf_Sym *symbol,
213					  const char *name, void *data),
214	       void *data)
215{
216	for (; i < count; ++i) {
217		GElf_Sym sym;
218		if (gelf_getsym(symtab, i, &sym) == NULL)
219			return (struct elf_each_symbol_t){ i, -2 };
220
221		switch (cb(&sym, strtab + sym.st_name, data)) {
222		case CBS_FAIL:
223			return (struct elf_each_symbol_t){ i, -1 };
224		case CBS_STOP:
225			return (struct elf_each_symbol_t){ i + 1, 0 };
226		case CBS_CONT:
227			break;
228		}
229	}
230
231	return (struct elf_each_symbol_t){ 0, 0 };
232}
233
234/* N.B.: gelf_getsym takes integer argument.  Since negative values
235 * are invalid as indices, we can use the extra bit to encode which
236 * symbol table we are looking into.  ltrace currently doesn't handle
237 * more than two symbol tables anyway, nor does it handle the xindex
238 * stuff.  */
239struct elf_each_symbol_t
240elf_each_symbol(struct ltelf *lte, unsigned start_after,
241		enum callback_status (*cb)(GElf_Sym *symbol,
242					   const char *name, void *data),
243		void *data)
244{
245	unsigned index = start_after == 0 ? 0 : start_after >> 1;
246
247	/* Go through static symbol table first.  */
248	if ((start_after & 0x1) == 0) {
249		struct elf_each_symbol_t st
250			= each_symbol_in(lte->symtab, lte->strtab,
251					 lte->symtab_count, index, cb, data);
252
253		/* If the iteration stopped prematurely, bail out.  */
254		if (st.restart != 0)
255			return ((struct elf_each_symbol_t)
256				{ st.restart << 1, st.status });
257	}
258
259	struct elf_each_symbol_t st
260		= each_symbol_in(lte->dynsym, lte->dynstr, lte->dynsym_count,
261				 index, cb, data);
262	if (st.restart != 0)
263		return ((struct elf_each_symbol_t)
264			{ st.restart << 1 | 0x1, st.status });
265
266	return (struct elf_each_symbol_t){ 0, 0 };
267}
268
269int
270elf_can_read_next(Elf_Data *data, GElf_Xword offset, GElf_Xword size)
271{
272	assert(data != NULL);
273	if (data->d_size < size || offset > data->d_size - size) {
274		debug(1, "Not enough data to read %"PRId64"-byte value"
275		      " at offset %"PRId64".", size, offset);
276		return 0;
277	}
278	return 1;
279}
280
281#define DEF_READER(NAME, SIZE)						\
282	int								\
283	NAME(Elf_Data *data, GElf_Xword offset, uint##SIZE##_t *retp)	\
284	{								\
285		if (!elf_can_read_next(data, offset, SIZE / 8))		\
286			return -1;					\
287									\
288		if (data->d_buf == NULL) /* NODATA section */ {		\
289			*retp = 0;					\
290			return 0;					\
291		}							\
292									\
293		union {							\
294			uint##SIZE##_t dst;				\
295			char buf[0];					\
296		} u;							\
297		memcpy(u.buf, data->d_buf + offset, sizeof(u.dst));	\
298		*retp = u.dst;						\
299		return 0;						\
300	}
301
302DEF_READER(elf_read_u8, 8)
303DEF_READER(elf_read_u16, 16)
304DEF_READER(elf_read_u32, 32)
305DEF_READER(elf_read_u64, 64)
306
307#undef DEF_READER
308
309#define DEF_READER(NAME, SIZE)						\
310	int								\
311	NAME(Elf_Data *data, GElf_Xword *offset, uint##SIZE##_t *retp)	\
312	{								\
313		int rc = elf_read_u##SIZE(data, *offset, retp);		\
314		if (rc < 0)						\
315			return rc;					\
316		*offset += SIZE / 8;					\
317		return 0;						\
318	}
319
320DEF_READER(elf_read_next_u8, 8)
321DEF_READER(elf_read_next_u16, 16)
322DEF_READER(elf_read_next_u32, 32)
323DEF_READER(elf_read_next_u64, 64)
324
325#undef DEF_READER
326
327int
328elf_read_next_uleb128(Elf_Data *data, GElf_Xword *offset, uint64_t *retp)
329{
330	uint64_t result = 0;
331	int shift = 0;
332	int size = 8 * sizeof result;
333
334	while (1) {
335		uint8_t byte;
336		if (elf_read_next_u8(data, offset, &byte) < 0)
337			return -1;
338
339		uint8_t payload = byte & 0x7f;
340		result |= (uint64_t)payload << shift;
341		shift += 7;
342		if (shift > size && byte != 0x1)
343			return -1;
344		if ((byte & 0x80) == 0)
345			break;
346	}
347
348	if (retp != NULL)
349		*retp = result;
350	return 0;
351}
352
353int
354elf_read_uleb128(Elf_Data *data, GElf_Xword offset, uint64_t *retp)
355{
356	return elf_read_next_uleb128(data, &offset, retp);
357}
358
359int
360ltelf_init(struct ltelf *lte, const char *filename)
361{
362	memset(lte, 0, sizeof *lte);
363	lte->fd = open(filename, O_RDONLY);
364	if (lte->fd == -1)
365		return 1;
366
367	elf_version(EV_CURRENT);
368
369#ifdef HAVE_ELF_C_READ_MMAP
370	lte->elf = elf_begin(lte->fd, ELF_C_READ_MMAP, NULL);
371#else
372	lte->elf = elf_begin(lte->fd, ELF_C_READ, NULL);
373#endif
374
375	if (lte->elf == NULL || elf_kind(lte->elf) != ELF_K_ELF) {
376		fprintf(stderr, "\"%s\" is not an ELF file\n", filename);
377		exit(EXIT_FAILURE);
378	}
379
380	if (gelf_getehdr(lte->elf, &lte->ehdr) == NULL) {
381		fprintf(stderr, "can't read ELF header of \"%s\": %s\n",
382			filename, elf_errmsg(-1));
383		exit(EXIT_FAILURE);
384	}
385
386	if (lte->ehdr.e_type != ET_EXEC && lte->ehdr.e_type != ET_DYN) {
387		fprintf(stderr, "\"%s\" is neither an ELF executable"
388			" nor a shared library\n", filename);
389		exit(EXIT_FAILURE);
390	}
391
392	if (1
393#ifdef LT_ELF_MACHINE
394	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS
395		|| lte->ehdr.e_machine != LT_ELF_MACHINE)
396#endif
397#ifdef LT_ELF_MACHINE2
398	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS2
399		|| lte->ehdr.e_machine != LT_ELF_MACHINE2)
400#endif
401#ifdef LT_ELF_MACHINE3
402	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS3
403		|| lte->ehdr.e_machine != LT_ELF_MACHINE3)
404#endif
405		) {
406		fprintf(stderr,
407			"\"%s\" is ELF from incompatible architecture\n",
408			filename);
409		exit(EXIT_FAILURE);
410	}
411
412	VECT_INIT(&lte->plt_relocs, GElf_Rela);
413
414	return 0;
415}
416
417void
418ltelf_destroy(struct ltelf *lte)
419{
420	debug(DEBUG_FUNCTION, "close_elf()");
421	elf_end(lte->elf);
422	close(lte->fd);
423	VECT_DESTROY(&lte->plt_relocs, GElf_Rela, NULL, NULL);
424}
425
426static void
427read_symbol_table(struct ltelf *lte, const char *filename,
428		  Elf_Scn *scn, GElf_Shdr *shdr, const char *name,
429		  Elf_Data **datap, size_t *countp, const char **strsp)
430{
431	*datap = elf_getdata(scn, NULL);
432	*countp = shdr->sh_size / shdr->sh_entsize;
433	if ((*datap == NULL || elf_getdata(scn, *datap) != NULL)
434	    && options.static_filter != NULL) {
435		fprintf(stderr, "Couldn't get data of section"
436			" %s from \"%s\": %s\n",
437			name, filename, elf_errmsg(-1));
438		exit(EXIT_FAILURE);
439	}
440
441	scn = elf_getscn(lte->elf, shdr->sh_link);
442	GElf_Shdr shdr2;
443	if (scn == NULL || gelf_getshdr(scn, &shdr2) == NULL) {
444		fprintf(stderr, "Couldn't get header of section"
445			" #%d from \"%s\": %s\n",
446			shdr->sh_link, filename, elf_errmsg(-1));
447		exit(EXIT_FAILURE);
448	}
449
450	Elf_Data *data = elf_getdata(scn, NULL);
451	if (data == NULL || elf_getdata(scn, data) != NULL
452	    || shdr2.sh_size != data->d_size || data->d_off) {
453		fprintf(stderr, "Couldn't get data of section"
454			" #%d from \"%s\": %s\n",
455			shdr2.sh_link, filename, elf_errmsg(-1));
456		exit(EXIT_FAILURE);
457	}
458
459	*strsp = data->d_buf;
460}
461
462static int
463rel_to_rela(struct ltelf *lte, const GElf_Rel *rel, GElf_Rela *rela)
464{
465	rela->r_offset = rel->r_offset;
466	rela->r_info = rel->r_info;
467
468	Elf_Scn *sec;
469	GElf_Shdr shdr;
470	if (elf_get_section_covering(lte, rel->r_offset, &sec, &shdr) < 0
471	    || sec == NULL)
472		return -1;
473
474	Elf_Data *data = elf_loaddata(sec, &shdr);
475	if (data == NULL)
476		return -1;
477
478	GElf_Xword offset = rel->r_offset - shdr.sh_addr - data->d_off;
479	uint64_t value;
480	if (lte->ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
481		uint32_t tmp;
482		if (elf_read_u32(data, offset, &tmp) < 0)
483			return -1;
484		value = tmp;
485	} else if (elf_read_u64(data, offset, &value) < 0) {
486		return -1;
487	}
488
489	rela->r_addend = value;
490	return 0;
491}
492
493int
494elf_read_relocs(struct ltelf *lte, Elf_Scn *scn, GElf_Shdr *shdr,
495		struct vect *rela_vec)
496{
497	if (vect_reserve_additional(rela_vec, lte->ehdr.e_shnum) < 0)
498		return -1;
499
500	Elf_Data *relplt = elf_loaddata(scn, shdr);
501	if (relplt == NULL) {
502		fprintf(stderr, "Couldn't load .rel*.plt data.\n");
503		return -1;
504	}
505
506	if ((shdr->sh_size % shdr->sh_entsize) != 0) {
507		fprintf(stderr, ".rel*.plt size (%" PRIx64 "d) not a multiple "
508			"of its sh_entsize (%" PRIx64 "d).\n",
509			shdr->sh_size, shdr->sh_entsize);
510		return -1;
511	}
512
513	GElf_Xword relplt_count = shdr->sh_size / shdr->sh_entsize;
514	GElf_Xword i;
515	for (i = 0; i < relplt_count; ++i) {
516		GElf_Rela rela;
517		if (relplt->d_type == ELF_T_REL) {
518			GElf_Rel rel;
519			if (gelf_getrel(relplt, i, &rel) == NULL
520			    || rel_to_rela(lte, &rel, &rela) < 0)
521				return -1;
522
523		} else if (gelf_getrela(relplt, i, &rela) == NULL) {
524			return -1;
525		}
526
527		if (VECT_PUSHBACK(rela_vec, &rela) < 0)
528			return -1;
529	}
530
531	return 0;
532}
533
534int
535elf_load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
536{
537	Elf_Scn *scn;
538	GElf_Shdr shdr;
539	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
540	    || scn == NULL) {
541	fail:
542		fprintf(stderr, "Couldn't get SHT_DYNAMIC: %s\n",
543			elf_errmsg(-1));
544		return -1;
545	}
546
547	Elf_Data *data = elf_loaddata(scn, &shdr);
548	if (data == NULL)
549		goto fail;
550
551	size_t j;
552	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
553		GElf_Dyn dyn;
554		if (gelf_getdyn(data, j, &dyn) == NULL)
555			goto fail;
556
557		if(dyn.d_tag == tag) {
558			*valuep = dyn.d_un.d_ptr;
559			return 0;
560		}
561	}
562
563	return -1;
564}
565
566static int
567ltelf_read_elf(struct ltelf *lte, const char *filename)
568{
569	int i;
570	GElf_Addr relplt_addr = 0;
571	GElf_Addr soname_offset = 0;
572	GElf_Xword relplt_size = 0;
573
574	debug(DEBUG_FUNCTION, "ltelf_read_elf(filename=%s)", filename);
575	debug(1, "Reading ELF from %s...", filename);
576
577	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
578		Elf_Scn *scn;
579		GElf_Shdr shdr;
580		const char *name;
581
582		scn = elf_getscn(lte->elf, i);
583		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
584			fprintf(stderr,	"Couldn't get section #%d from"
585				" \"%s\": %s\n", i, filename, elf_errmsg(-1));
586			exit(EXIT_FAILURE);
587		}
588
589		name = elf_strptr(lte->elf, lte->ehdr.e_shstrndx, shdr.sh_name);
590		if (name == NULL) {
591			fprintf(stderr,	"Couldn't get name of section #%d from"
592				" \"%s\": %s\n", i, filename, elf_errmsg(-1));
593			exit(EXIT_FAILURE);
594		}
595
596		if (shdr.sh_type == SHT_SYMTAB) {
597			read_symbol_table(lte, filename,
598					  scn, &shdr, name, &lte->symtab,
599					  &lte->symtab_count, &lte->strtab);
600
601		} else if (shdr.sh_type == SHT_DYNSYM) {
602			read_symbol_table(lte, filename,
603					  scn, &shdr, name, &lte->dynsym,
604					  &lte->dynsym_count, &lte->dynstr);
605
606		} else if (shdr.sh_type == SHT_DYNAMIC) {
607			Elf_Data *data;
608			size_t j;
609
610			lte->dyn_addr = shdr.sh_addr + lte->bias;
611			lte->dyn_sz = shdr.sh_size;
612
613			data = elf_getdata(scn, NULL);
614			if (data == NULL || elf_getdata(scn, data) != NULL) {
615				fprintf(stderr, "Couldn't get .dynamic data"
616					" from \"%s\": %s\n",
617					filename, strerror(errno));
618				exit(EXIT_FAILURE);
619			}
620
621			for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
622				GElf_Dyn dyn;
623
624				if (gelf_getdyn(data, j, &dyn) == NULL) {
625					fprintf(stderr, "Couldn't get .dynamic"
626						" data from \"%s\": %s\n",
627						filename, strerror(errno));
628					exit(EXIT_FAILURE);
629				}
630				if (dyn.d_tag == DT_JMPREL)
631					relplt_addr = dyn.d_un.d_ptr;
632				else if (dyn.d_tag == DT_PLTRELSZ)
633					relplt_size = dyn.d_un.d_val;
634				else if (dyn.d_tag == DT_SONAME)
635					soname_offset = dyn.d_un.d_val;
636			}
637		} else if (shdr.sh_type == SHT_PROGBITS
638			   || shdr.sh_type == SHT_NOBITS) {
639			if (strcmp(name, ".plt") == 0) {
640				lte->plt_addr = shdr.sh_addr;
641				lte->plt_size = shdr.sh_size;
642				lte->plt_data = elf_loaddata(scn, &shdr);
643				if (lte->plt_data == NULL)
644					fprintf(stderr,
645						"Can't load .plt data\n");
646				lte->plt_flags = shdr.sh_flags;
647			}
648#ifdef ARCH_SUPPORTS_OPD
649			else if (strcmp(name, ".opd") == 0) {
650				lte->opd_addr = (GElf_Addr *) (long) shdr.sh_addr;
651				lte->opd_size = shdr.sh_size;
652				lte->opd = elf_rawdata(scn, NULL);
653			}
654#endif
655		}
656	}
657
658	if (lte->dynsym == NULL || lte->dynstr == NULL) {
659		fprintf(stderr, "Couldn't find .dynsym or .dynstr in \"%s\"\n",
660			filename);
661		exit(EXIT_FAILURE);
662	}
663
664	if (!relplt_addr || !lte->plt_addr) {
665		debug(1, "%s has no PLT relocations", filename);
666	} else if (relplt_size == 0) {
667		debug(1, "%s has unknown PLT size", filename);
668	} else {
669		for (i = 1; i < lte->ehdr.e_shnum; ++i) {
670			Elf_Scn *scn;
671			GElf_Shdr shdr;
672
673			scn = elf_getscn(lte->elf, i);
674			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
675				fprintf(stderr, "Couldn't get section header"
676					" from \"%s\": %s\n",
677					filename, elf_errmsg(-1));
678				exit(EXIT_FAILURE);
679			}
680			if (shdr.sh_addr == relplt_addr
681			    && shdr.sh_size == relplt_size) {
682				if (elf_read_relocs(lte, scn, &shdr,
683						    &lte->plt_relocs) < 0) {
684					fprintf(stderr, "Couldn't get .rel*.plt"
685						" data from \"%s\": %s\n",
686						filename, elf_errmsg(-1));
687					exit(EXIT_FAILURE);
688				}
689				break;
690			}
691		}
692
693		if (i == lte->ehdr.e_shnum) {
694			fprintf(stderr,
695				"Couldn't find .rel*.plt section in \"%s\"\n",
696				filename);
697			exit(EXIT_FAILURE);
698		}
699	}
700	debug(1, "%s %zd PLT relocations", filename,
701	      vect_size(&lte->plt_relocs));
702
703	if (soname_offset != 0)
704		lte->soname = lte->dynstr + soname_offset;
705
706	return 0;
707}
708
709#ifndef ARCH_HAVE_GET_SYMINFO
710int
711arch_get_sym_info(struct ltelf *lte, const char *filename,
712		  size_t sym_index, GElf_Rela *rela, GElf_Sym *sym)
713{
714	return gelf_getsym(lte->dynsym,
715			   ELF64_R_SYM(rela->r_info), sym) != NULL ? 0 : -1;
716}
717#endif
718
719int
720default_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
721			  const char *a_name, GElf_Rela *rela, size_t ndx,
722			  struct library_symbol **ret)
723{
724	char *name = strdup(a_name);
725	if (name == NULL) {
726	fail_message:
727		fprintf(stderr, "Couldn't create symbol for PLT entry: %s\n",
728			strerror(errno));
729	fail:
730		free(name);
731		return -1;
732	}
733
734	GElf_Addr addr = arch_plt_sym_val(lte, ndx, rela);
735
736	struct library_symbol *libsym = malloc(sizeof(*libsym));
737	if (libsym == NULL)
738		goto fail_message;
739
740	/* XXX The double cast should be removed when
741	 * arch_addr_t becomes integral type.  */
742	arch_addr_t taddr = (arch_addr_t)
743		(uintptr_t)(addr + lte->bias);
744
745	if (library_symbol_init(libsym, taddr, name, 1, LS_TOPLT_EXEC) < 0) {
746		free(libsym);
747		goto fail;
748	}
749
750	libsym->next = *ret;
751	*ret = libsym;
752	return 0;
753}
754
755int
756elf_add_plt_entry(struct process *proc, struct ltelf *lte,
757		  const char *name, GElf_Rela *rela, size_t idx,
758		  struct library_symbol **ret)
759{
760	enum plt_status plts
761		= arch_elf_add_plt_entry(proc, lte, name, rela, idx, ret);
762
763	if (plts == PLT_DEFAULT)
764		plts = os_elf_add_plt_entry(proc, lte, name, rela, idx, ret);
765
766	switch (plts) {
767	case PLT_DEFAULT:
768		return default_elf_add_plt_entry(proc, lte, name,
769						 rela, idx, ret);
770	case PLT_FAIL:
771		return -1;
772	case PLT_OK:
773		return 0;
774	}
775
776	assert(! "Invalid return from X_elf_add_plt_entry!");
777	abort();
778}
779
780static void
781mark_chain_latent(struct library_symbol *libsym)
782{
783	for (; libsym != NULL; libsym = libsym->next) {
784		debug(DEBUG_FUNCTION, "marking %s latent", libsym->name);
785		libsym->latent = 1;
786	}
787}
788
789static void
790filter_symbol_chain(struct filter *filter,
791		    struct library_symbol **libsymp, struct library *lib)
792{
793	assert(libsymp != NULL);
794	struct library_symbol **ptr = libsymp;
795	while (*ptr != NULL) {
796		if (filter_matches_symbol(filter, (*ptr)->name, lib)) {
797			ptr = &(*ptr)->next;
798		} else {
799			struct library_symbol *sym = *ptr;
800			*ptr = (*ptr)->next;
801			library_symbol_destroy(sym);
802			free(sym);
803		}
804	}
805}
806
807static int
808populate_plt(struct process *proc, const char *filename,
809	     struct ltelf *lte, struct library *lib)
810{
811	const bool latent_plts = options.export_filter != NULL;
812	const size_t count = vect_size(&lte->plt_relocs);
813
814	size_t i;
815	for (i = 0; i < count; ++i) {
816		GElf_Rela *rela = VECT_ELEMENT(&lte->plt_relocs, GElf_Rela, i);
817		GElf_Sym sym;
818
819		switch (arch_get_sym_info(lte, filename, i, rela, &sym)) {
820		default:
821			fprintf(stderr,
822				"Couldn't get relocation for symbol #%zd"
823				" from \"%s\": %s\n",
824				i, filename, elf_errmsg(-1));
825			/* Fall through.  */
826		case 1:
827			continue; /* Skip this entry.  */
828		case 0:
829			break;
830		}
831
832		char const *name = lte->dynstr + sym.st_name;
833		int matched = filter_matches_symbol(options.plt_filter,
834						    name, lib);
835
836		struct library_symbol *libsym = NULL;
837		if (elf_add_plt_entry(proc, lte, name, rela, i, &libsym) < 0)
838			return -1;
839
840		/* If we didn't match the PLT entry, filter the chain
841		 * to only include the matching symbols (but include
842		 * all if we are adding latent symbols) to allow
843		 * backends to override the PLT symbol's name.  */
844
845		if (! matched && ! latent_plts)
846			filter_symbol_chain(options.plt_filter, &libsym, lib);
847
848		if (libsym != NULL) {
849			/* If we are adding those symbols just for
850			 * tracing exports, mark them all latent.  */
851			if (! matched && latent_plts)
852				mark_chain_latent(libsym);
853			library_add_symbol(lib, libsym);
854		}
855	}
856	return 0;
857}
858
859static void
860delete_symbol_chain(struct library_symbol *libsym)
861{
862	while (libsym != NULL) {
863		struct library_symbol *tmp = libsym->next;
864		library_symbol_destroy(libsym);
865		free(libsym);
866		libsym = tmp;
867	}
868}
869
870/* When -x rules result in request to trace several aliases, we only
871 * want to add such symbol once.  The only way that those symbols
872 * differ in is their name, e.g. in glibc you have __GI___libc_free,
873 * __cfree, __free, __libc_free, cfree and free all defined on the
874 * same address.  So instead we keep this unique symbol struct for
875 * each address, and replace name in libsym with a shorter variant if
876 * we find it.  */
877struct unique_symbol {
878	arch_addr_t addr;
879	struct library_symbol *libsym;
880};
881
882static int
883unique_symbol_cmp(const void *key, const void *val)
884{
885	const struct unique_symbol *sym_key = key;
886	const struct unique_symbol *sym_val = val;
887	return sym_key->addr != sym_val->addr;
888}
889
890static enum callback_status
891symbol_with_address(struct library_symbol *sym, void *addrptr)
892{
893	return sym->enter_addr == *(arch_addr_t *)addrptr
894		? CBS_STOP : CBS_CONT;
895}
896
897static int
898populate_this_symtab(struct process *proc, const char *filename,
899		     struct ltelf *lte, struct library *lib,
900		     Elf_Data *symtab, const char *strtab, size_t count,
901		     struct library_exported_name **names)
902{
903	/* If a valid NAMES is passed, we pass in *NAMES a list of
904	 * symbol names that this library exports.  */
905	if (names != NULL)
906		*names = NULL;
907
908	/* Using sorted array would be arguably better, but this
909	 * should be well enough for the number of symbols that we
910	 * typically deal with.  */
911	size_t num_symbols = 0;
912	struct unique_symbol *symbols = malloc(sizeof(*symbols) * count);
913	if (symbols == NULL) {
914		fprintf(stderr, "couldn't insert symbols for -x: %s\n",
915			strerror(errno));
916		return -1;
917	}
918
919	GElf_Word secflags[lte->ehdr.e_shnum];
920	size_t i;
921	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
922		Elf_Scn *scn = elf_getscn(lte->elf, i);
923		GElf_Shdr shdr;
924		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
925			secflags[i] = 0;
926		else
927			secflags[i] = shdr.sh_flags;
928	}
929
930	for (i = 0; i < count; ++i) {
931		GElf_Sym sym;
932		if (gelf_getsym(symtab, i, &sym) == NULL) {
933			fprintf(stderr,
934				"couldn't get symbol #%zd from %s: %s\n",
935				i, filename, elf_errmsg(-1));
936			continue;
937		}
938
939		if (sym.st_value == 0 || sym.st_shndx == STN_UNDEF
940		    /* Also ignore any special values besides direct
941		     * section references.  */
942		    || sym.st_shndx >= lte->ehdr.e_shnum)
943			continue;
944
945		/* Find symbol name and snip version.  */
946		const char *orig_name = strtab + sym.st_name;
947		const char *version = strchr(orig_name, '@');
948		size_t len = version != NULL ? (assert(version > orig_name),
949						(size_t)(version - orig_name))
950			: strlen(orig_name);
951		char name[len + 1];
952		memcpy(name, orig_name, len);
953		name[len] = 0;
954
955		/* If we are interested in exports, store this name.  */
956		if (names != NULL) {
957			struct library_exported_name *export
958				= malloc(sizeof *export);
959			char *name_copy = strdup(name);
960
961			if (name_copy == NULL || export == NULL) {
962				free(name_copy);
963				free(export);
964				fprintf(stderr, "Couldn't store symbol %s.  "
965					"Tracing may be incomplete.\n", name);
966			} else {
967				export->name = name_copy;
968				export->own_name = 1;
969				export->next = *names;
970				*names = export;
971			}
972		}
973
974		/* If the symbol is not matched, skip it.  We already
975		 * stored it to export list above.  */
976		if (!filter_matches_symbol(options.static_filter, name, lib))
977			continue;
978
979		arch_addr_t addr = (arch_addr_t)
980			(uintptr_t)(sym.st_value + lte->bias);
981		arch_addr_t naddr;
982
983		/* On arches that support OPD, the value of typical
984		 * function symbol will be a pointer to .opd, but some
985		 * will point directly to .text.  We don't want to
986		 * translate those.  */
987		if (secflags[sym.st_shndx] & SHF_EXECINSTR) {
988			naddr = addr;
989		} else if (arch_translate_address(lte, addr, &naddr) < 0) {
990			fprintf(stderr,
991				"couldn't translate address of %s@%s: %s\n",
992				name, lib->soname, strerror(errno));
993			continue;
994		}
995
996		char *full_name = strdup(name);
997		if (full_name == NULL) {
998			fprintf(stderr, "couldn't copy name of %s@%s: %s\n",
999				name, lib->soname, strerror(errno));
1000			continue;
1001		}
1002
1003		struct library_symbol *libsym = NULL;
1004		enum plt_status plts
1005			= arch_elf_add_func_entry(proc, lte, &sym,
1006						  naddr, full_name, &libsym);
1007		if (plts == PLT_DEFAULT)
1008			plts = os_elf_add_func_entry(proc, lte, &sym,
1009						     naddr, full_name, &libsym);
1010
1011		switch (plts) {
1012		case PLT_DEFAULT:;
1013			/* Put the default symbol to the chain.  */
1014			struct library_symbol *tmp = malloc(sizeof *tmp);
1015			if (tmp == NULL
1016			    || library_symbol_init(tmp, naddr, full_name, 1,
1017						   LS_TOPLT_NONE) < 0) {
1018				free(tmp);
1019
1020				/* Either add the whole bunch, or none
1021				 * of it.  Note that for PLT_FAIL we
1022				 * don't do this--it's the callee's
1023				 * job to clean up after itself before
1024				 * it bails out.  */
1025				delete_symbol_chain(libsym);
1026				libsym = NULL;
1027
1028		case PLT_FAIL:
1029				fprintf(stderr, "Couldn't add symbol %s@%s "
1030					"for tracing.\n", name, lib->soname);
1031
1032				break;
1033			}
1034
1035			full_name = NULL;
1036			tmp->next = libsym;
1037			libsym = tmp;
1038			break;
1039
1040		case PLT_OK:
1041			break;
1042		}
1043
1044		free(full_name);
1045
1046		struct library_symbol *tmp;
1047		for (tmp = libsym; tmp != NULL; ) {
1048			/* Look whether we already have a symbol for
1049			 * this address.  If not, add this one.  If
1050			 * yes, look if we should pick the new symbol
1051			 * name.  */
1052
1053			struct unique_symbol key = { tmp->enter_addr, NULL };
1054			struct unique_symbol *unique
1055				= lsearch(&key, symbols, &num_symbols,
1056					  sizeof *symbols, &unique_symbol_cmp);
1057
1058			if (unique->libsym == NULL) {
1059				unique->libsym = tmp;
1060				unique->addr = tmp->enter_addr;
1061				tmp = tmp->next;
1062				unique->libsym->next = NULL;
1063			} else {
1064				if (strlen(tmp->name)
1065				    < strlen(unique->libsym->name)) {
1066					library_symbol_set_name
1067						(unique->libsym, tmp->name, 1);
1068					tmp->name = NULL;
1069				}
1070				struct library_symbol *next = tmp->next;
1071				library_symbol_destroy(tmp);
1072				free(tmp);
1073				tmp = next;
1074			}
1075		}
1076	}
1077
1078	/* Now we do the union of this set of unique symbols with
1079	 * what's already in the library.  */
1080	for (i = 0; i < num_symbols; ++i) {
1081		struct library_symbol *this_sym = symbols[i].libsym;
1082		assert(this_sym != NULL);
1083		struct library_symbol *other
1084			= library_each_symbol(lib, NULL, symbol_with_address,
1085					      &this_sym->enter_addr);
1086		if (other != NULL) {
1087			library_symbol_destroy(this_sym);
1088			free(this_sym);
1089			symbols[i].libsym = NULL;
1090		}
1091	}
1092
1093	for (i = 0; i < num_symbols; ++i)
1094		if (symbols[i].libsym != NULL)
1095			library_add_symbol(lib, symbols[i].libsym);
1096
1097	free(symbols);
1098	return 0;
1099}
1100
1101static int
1102populate_symtab(struct process *proc, const char *filename,
1103		struct ltelf *lte, struct library *lib,
1104		int symtabs, int exports)
1105{
1106	int status;
1107	if (symtabs && lte->symtab != NULL && lte->strtab != NULL
1108	    && (status = populate_this_symtab(proc, filename, lte, lib,
1109					      lte->symtab, lte->strtab,
1110					      lte->symtab_count, NULL)) < 0)
1111		return status;
1112
1113	/* Check whether we want to trace symbols implemented by this
1114	 * library (-l).  */
1115	struct library_exported_name **names = NULL;
1116	if (exports) {
1117		debug(DEBUG_FUNCTION, "-l matches %s", lib->soname);
1118		names = &lib->exported_names;
1119	}
1120
1121	return populate_this_symtab(proc, filename, lte, lib,
1122				    lte->dynsym, lte->dynstr,
1123				    lte->dynsym_count, names);
1124}
1125
1126static int
1127read_module(struct library *lib, struct process *proc,
1128	    const char *filename, GElf_Addr bias, int main)
1129{
1130	struct ltelf lte;
1131	if (ltelf_init(&lte, filename) < 0)
1132		return -1;
1133
1134	/* XXX When we abstract ABI into a module, this should instead
1135	 * become something like
1136	 *
1137	 *    proc->abi = arch_get_abi(lte.ehdr);
1138	 *
1139	 * The code in ltelf_init needs to be replaced by this logic.
1140	 * Be warned that libltrace.c calls ltelf_init as well to
1141	 * determine whether ABI is supported.  This is to get
1142	 * reasonable error messages when trying to run 64-bit binary
1143	 * with 32-bit ltrace.  It is desirable to preserve this.  */
1144	proc->e_machine = lte.ehdr.e_machine;
1145	proc->e_class = lte.ehdr.e_ident[EI_CLASS];
1146	get_arch_dep(proc);
1147
1148	/* Find out the base address.  For PIE main binaries we look
1149	 * into auxv, otherwise we scan phdrs.  */
1150	if (main && lte.ehdr.e_type == ET_DYN) {
1151		arch_addr_t entry;
1152		if (process_get_entry(proc, &entry, NULL) < 0) {
1153			fprintf(stderr, "Couldn't find entry of PIE %s\n",
1154				filename);
1155		fail:
1156			ltelf_destroy(&lte);
1157			return -1;
1158		}
1159		/* XXX The double cast should be removed when
1160		 * arch_addr_t becomes integral type.  */
1161		lte.entry_addr = (GElf_Addr)(uintptr_t)entry;
1162		lte.bias = (GElf_Addr)(uintptr_t)entry - lte.ehdr.e_entry;
1163
1164	} else {
1165		GElf_Phdr phdr;
1166		size_t i;
1167		for (i = 0; gelf_getphdr (lte.elf, i, &phdr) != NULL; ++i) {
1168			if (phdr.p_type == PT_LOAD) {
1169				lte.base_addr = phdr.p_vaddr + bias;
1170				break;
1171			}
1172		}
1173
1174		lte.bias = bias;
1175		lte.entry_addr = lte.ehdr.e_entry + lte.bias;
1176
1177		if (lte.base_addr == 0) {
1178			fprintf(stderr,
1179				"Couldn't determine base address of %s\n",
1180				filename);
1181			goto fail;
1182		}
1183	}
1184
1185	if (ltelf_read_elf(&lte, filename) < 0)
1186		goto fail;
1187
1188	if (arch_elf_init(&lte, lib) < 0) {
1189		fprintf(stderr, "Backend initialization failed.\n");
1190		goto fail;
1191	}
1192
1193	if (lib == NULL)
1194		goto fail;
1195
1196	/* Note that we set soname and pathname as soon as they are
1197	 * allocated, so in case of further errors, this get released
1198	 * when LIB is released, which should happen in the caller
1199	 * when we return error.  */
1200
1201	if (lib->pathname == NULL) {
1202		char *pathname = strdup(filename);
1203		if (pathname == NULL)
1204			goto fail;
1205		library_set_pathname(lib, pathname, 1);
1206	}
1207
1208	if (lte.soname != NULL) {
1209		char *soname = strdup(lte.soname);
1210		if (soname == NULL)
1211			goto fail;
1212		library_set_soname(lib, soname, 1);
1213	} else {
1214		const char *soname = rindex(lib->pathname, '/');
1215		if (soname != NULL)
1216			soname += 1;
1217		else
1218			soname = lib->pathname;
1219		library_set_soname(lib, soname, 0);
1220	}
1221
1222	/* XXX The double cast should be removed when
1223	 * arch_addr_t becomes integral type.  */
1224	arch_addr_t entry = (arch_addr_t)(uintptr_t)lte.entry_addr;
1225	if (arch_translate_address(&lte, entry, &entry) < 0)
1226		goto fail;
1227
1228	/* XXX The double cast should be removed when
1229	 * arch_addr_t becomes integral type.  */
1230	lib->base = (arch_addr_t)(uintptr_t)lte.base_addr;
1231	lib->entry = entry;
1232	/* XXX The double cast should be removed when
1233	 * arch_addr_t becomes integral type.  */
1234	lib->dyn_addr = (arch_addr_t)(uintptr_t)lte.dyn_addr;
1235
1236	/* There are two reasons that we need to inspect symbol tables
1237	 * or populate PLT entries.  Either the user requested
1238	 * corresponding tracing features (respectively -x and -e), or
1239	 * they requested tracing exported symbols (-l).
1240	 *
1241	 * In the latter case we need to keep even those PLT slots
1242	 * that are not requested by -e (but we keep them latent).  We
1243	 * also need to inspect .dynsym to find what exports this
1244	 * library provide, to turn on existing latent PLT
1245	 * entries.  */
1246
1247	int plts = filter_matches_library(options.plt_filter, lib);
1248	if ((plts || options.export_filter != NULL)
1249	    && populate_plt(proc, filename, &lte, lib) < 0)
1250		goto fail;
1251
1252	int exports = filter_matches_library(options.export_filter, lib);
1253	int symtabs = filter_matches_library(options.static_filter, lib);
1254	if ((symtabs || exports)
1255	    && populate_symtab(proc, filename, &lte, lib,
1256			       symtabs, exports) < 0)
1257		goto fail;
1258
1259	arch_elf_destroy(&lte);
1260	ltelf_destroy(&lte);
1261	return 0;
1262}
1263
1264int
1265ltelf_read_library(struct library *lib, struct process *proc,
1266		   const char *filename, GElf_Addr bias)
1267{
1268	return read_module(lib, proc, filename, bias, 0);
1269}
1270
1271
1272struct library *
1273ltelf_read_main_binary(struct process *proc, const char *path)
1274{
1275	struct library *lib = malloc(sizeof(*lib));
1276	if (lib == NULL || library_init(lib, LT_LIBTYPE_MAIN) < 0) {
1277		free(lib);
1278		return NULL;
1279	}
1280	library_set_pathname(lib, path, 0);
1281
1282	/* There is a race between running the process and reading its
1283	 * binary for internal consumption.  So open the binary from
1284	 * the /proc filesystem.  XXX Note that there is similar race
1285	 * for libraries, but there we don't have a nice answer like
1286	 * that.  Presumably we could read the DSOs from the process
1287	 * memory image, but that's not currently done.  */
1288	char *fname = pid2name(proc->pid);
1289	if (fname == NULL
1290	    || read_module(lib, proc, fname, 0, 1) < 0) {
1291		library_destroy(lib);
1292		free(lib);
1293		lib = NULL;
1294	}
1295
1296	free(fname);
1297	return lib;
1298}
1299