1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2006,2010,2011,2012,2013 Petr Machata, Red Hat Inc.
4 * Copyright (C) 2010 Zachary T Welch, CodeSourcery
5 * Copyright (C) 2010 Joe Damato
6 * Copyright (C) 1997,1998,2001,2004,2007,2008,2009 Juan Cespedes
7 * Copyright (C) 2006 Olaf Hering, SUSE Linux GmbH
8 * Copyright (C) 2006 Eric Vaitl, Cisco Systems, Inc.
9 * Copyright (C) 2006 Paul Gilliam, IBM Corporation
10 * Copyright (C) 2006 Ian Wienand
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 * 02110-1301 USA
26 */
27
28#include "config.h"
29
30#include <assert.h>
31#ifdef	__linux__
32#include <endian.h>
33#endif
34#include <errno.h>
35#include <fcntl.h>
36#include <gelf.h>
37#include <inttypes.h>
38#include <search.h>
39#include <stdbool.h>
40#include <stdint.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <strings.h>
45#include <unistd.h>
46
47#include "backend.h"
48#include "filter.h"
49#include "library.h"
50#include "ltrace-elf.h"
51#include "proc.h"
52#include "debug.h"
53#include "options.h"
54
55#ifndef ARCH_HAVE_LTELF_DATA
56int
57arch_elf_init(struct ltelf *lte, struct library *lib)
58{
59	return 0;
60}
61
62void
63arch_elf_destroy(struct ltelf *lte)
64{
65}
66#endif
67
68#ifndef OS_HAVE_ADD_PLT_ENTRY
69enum plt_status
70os_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
71		     const char *a_name, GElf_Rela *rela, size_t ndx,
72		     struct library_symbol **ret)
73{
74	return PLT_DEFAULT;
75}
76#endif
77
78#ifndef ARCH_HAVE_ADD_PLT_ENTRY
79enum plt_status
80arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
81		       const char *a_name, GElf_Rela *rela, size_t ndx,
82		       struct library_symbol **ret)
83{
84	return PLT_DEFAULT;
85}
86#endif
87
88#ifndef OS_HAVE_ADD_FUNC_ENTRY
89enum plt_status
90os_elf_add_func_entry(struct process *proc, struct ltelf *lte,
91		      const GElf_Sym *sym,
92		      arch_addr_t addr, const char *name,
93		      struct library_symbol **ret)
94{
95	if (GELF_ST_TYPE(sym->st_info) != STT_FUNC) {
96		*ret = NULL;
97		return PLT_OK;
98	} else {
99		return PLT_DEFAULT;
100	}
101}
102#endif
103
104#ifndef ARCH_HAVE_ADD_FUNC_ENTRY
105enum plt_status
106arch_elf_add_func_entry(struct process *proc, struct ltelf *lte,
107			const GElf_Sym *sym,
108			arch_addr_t addr, const char *name,
109			struct library_symbol **ret)
110{
111	return PLT_DEFAULT;
112}
113#endif
114
115Elf_Data *
116elf_loaddata(Elf_Scn *scn, GElf_Shdr *shdr)
117{
118	Elf_Data *data = elf_getdata(scn, NULL);
119	if (data == NULL || elf_getdata(scn, data) != NULL
120	    || data->d_off || data->d_size != shdr->sh_size)
121		return NULL;
122	return data;
123}
124
125static int
126elf_get_section_if(struct ltelf *lte, Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr,
127		   int (*predicate)(Elf_Scn *, GElf_Shdr *, void *data),
128		   void *data)
129{
130	int i;
131	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
132		Elf_Scn *scn;
133		GElf_Shdr shdr;
134
135		scn = elf_getscn(lte->elf, i);
136		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
137			debug(1, "Couldn't read section or header.");
138			return -1;
139		}
140		if (predicate(scn, &shdr, data)) {
141			*tgt_sec = scn;
142			*tgt_shdr = shdr;
143			return 0;
144		}
145	}
146
147	*tgt_sec = NULL;
148	return 0;
149}
150
151static int
152inside_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
153{
154	GElf_Addr addr = *(GElf_Addr *)data;
155	return addr >= shdr->sh_addr
156		&& addr < shdr->sh_addr + shdr->sh_size;
157}
158
159int
160elf_get_section_covering(struct ltelf *lte, GElf_Addr addr,
161			 Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
162{
163	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
164				  &inside_p, &addr);
165}
166
167static int
168type_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
169{
170	GElf_Word type = *(GElf_Word *)data;
171	return shdr->sh_type == type;
172}
173
174int
175elf_get_section_type(struct ltelf *lte, GElf_Word type,
176		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
177{
178	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
179				  &type_p, &type);
180}
181
182struct section_named_data {
183	struct ltelf *lte;
184	const char *name;
185};
186
187static int
188name_p(Elf_Scn *scn, GElf_Shdr *shdr, void *d)
189{
190	struct section_named_data *data = d;
191	const char *name = elf_strptr(data->lte->elf,
192				      data->lte->ehdr.e_shstrndx,
193				      shdr->sh_name);
194	return strcmp(name, data->name) == 0;
195}
196
197int
198elf_get_section_named(struct ltelf *lte, const char *name,
199		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
200{
201	struct section_named_data data = {
202		.lte = lte,
203		.name = name,
204	};
205	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
206				  &name_p, &data);
207}
208
209static struct elf_each_symbol_t
210each_symbol_in(Elf_Data *symtab, const char *strtab, size_t count,
211	       unsigned i,
212	       enum callback_status (*cb)(GElf_Sym *symbol,
213					  const char *name, void *data),
214	       void *data)
215{
216	for (; i < count; ++i) {
217		GElf_Sym sym;
218		if (gelf_getsym(symtab, i, &sym) == NULL)
219			return (struct elf_each_symbol_t){ i, -2 };
220
221		switch (cb(&sym, strtab + sym.st_name, data)) {
222		case CBS_FAIL:
223			return (struct elf_each_symbol_t){ i, -1 };
224		case CBS_STOP:
225			return (struct elf_each_symbol_t){ i + 1, 0 };
226		case CBS_CONT:
227			break;
228		}
229	}
230
231	return (struct elf_each_symbol_t){ 0, 0 };
232}
233
234/* N.B.: gelf_getsym takes integer argument.  Since negative values
235 * are invalid as indices, we can use the extra bit to encode which
236 * symbol table we are looking into.  ltrace currently doesn't handle
237 * more than two symbol tables anyway, nor does it handle the xindex
238 * stuff.  */
239struct elf_each_symbol_t
240elf_each_symbol(struct ltelf *lte, unsigned start_after,
241		enum callback_status (*cb)(GElf_Sym *symbol,
242					   const char *name, void *data),
243		void *data)
244{
245	unsigned index = start_after == 0 ? 0 : start_after >> 1;
246
247	/* Go through static symbol table first.  */
248	if ((start_after & 0x1) == 0) {
249		struct elf_each_symbol_t st
250			= each_symbol_in(lte->symtab, lte->strtab,
251					 lte->symtab_count, index, cb, data);
252
253		/* If the iteration stopped prematurely, bail out.  */
254		if (st.restart != 0)
255			return ((struct elf_each_symbol_t)
256				{ st.restart << 1, st.status });
257	}
258
259	struct elf_each_symbol_t st
260		= each_symbol_in(lte->dynsym, lte->dynstr, lte->dynsym_count,
261				 index, cb, data);
262	if (st.restart != 0)
263		return ((struct elf_each_symbol_t)
264			{ st.restart << 1 | 0x1, st.status });
265
266	return (struct elf_each_symbol_t){ 0, 0 };
267}
268
269int
270elf_can_read_next(Elf_Data *data, GElf_Xword offset, GElf_Xword size)
271{
272	assert(data != NULL);
273	if (data->d_size < size || offset > data->d_size - size) {
274		debug(1, "Not enough data to read %"PRId64"-byte value"
275		      " at offset %"PRId64".", size, offset);
276		return 0;
277	}
278	return 1;
279}
280
281#define DEF_READER(NAME, SIZE)						\
282	int								\
283	NAME(Elf_Data *data, GElf_Xword offset, uint##SIZE##_t *retp)	\
284	{								\
285		if (!elf_can_read_next(data, offset, SIZE / 8))		\
286			return -1;					\
287									\
288		if (data->d_buf == NULL) /* NODATA section */ {		\
289			*retp = 0;					\
290			return 0;					\
291		}							\
292									\
293		union {							\
294			uint##SIZE##_t dst;				\
295			char buf[0];					\
296		} u;							\
297		memcpy(u.buf, data->d_buf + offset, sizeof(u.dst));	\
298		*retp = u.dst;						\
299		return 0;						\
300	}
301
302DEF_READER(elf_read_u8, 8)
303DEF_READER(elf_read_u16, 16)
304DEF_READER(elf_read_u32, 32)
305DEF_READER(elf_read_u64, 64)
306
307#undef DEF_READER
308
309#define DEF_READER(NAME, SIZE)						\
310	int								\
311	NAME(Elf_Data *data, GElf_Xword *offset, uint##SIZE##_t *retp)	\
312	{								\
313		int rc = elf_read_u##SIZE(data, *offset, retp);		\
314		if (rc < 0)						\
315			return rc;					\
316		*offset += SIZE / 8;					\
317		return 0;						\
318	}
319
320DEF_READER(elf_read_next_u8, 8)
321DEF_READER(elf_read_next_u16, 16)
322DEF_READER(elf_read_next_u32, 32)
323DEF_READER(elf_read_next_u64, 64)
324
325#undef DEF_READER
326
327int
328elf_read_next_uleb128(Elf_Data *data, GElf_Xword *offset, uint64_t *retp)
329{
330	uint64_t result = 0;
331	int shift = 0;
332	int size = 8 * sizeof result;
333
334	while (1) {
335		uint8_t byte;
336		if (elf_read_next_u8(data, offset, &byte) < 0)
337			return -1;
338
339		uint8_t payload = byte & 0x7f;
340		result |= (uint64_t)payload << shift;
341		shift += 7;
342		if (shift > size && byte != 0x1)
343			return -1;
344		if ((byte & 0x80) == 0)
345			break;
346	}
347
348	if (retp != NULL)
349		*retp = result;
350	return 0;
351}
352
353int
354elf_read_uleb128(Elf_Data *data, GElf_Xword offset, uint64_t *retp)
355{
356	return elf_read_next_uleb128(data, &offset, retp);
357}
358
359int
360ltelf_init(struct ltelf *lte, const char *filename)
361{
362	memset(lte, 0, sizeof *lte);
363	lte->fd = open(filename, O_RDONLY);
364	if (lte->fd == -1) {
365		fprintf(stderr, "Can't open %s: %s\n", filename,
366			strerror(errno));
367		return 1;
368	}
369
370	elf_version(EV_CURRENT);
371
372#ifdef HAVE_ELF_C_READ_MMAP
373	lte->elf = elf_begin(lte->fd, ELF_C_READ_MMAP, NULL);
374#else
375	lte->elf = elf_begin(lte->fd, ELF_C_READ, NULL);
376#endif
377
378	if (lte->elf == NULL || elf_kind(lte->elf) != ELF_K_ELF) {
379		fprintf(stderr, "\"%s\" is not an ELF file\n", filename);
380		exit(EXIT_FAILURE);
381	}
382
383	if (gelf_getehdr(lte->elf, &lte->ehdr) == NULL) {
384		fprintf(stderr, "can't read ELF header of \"%s\": %s\n",
385			filename, elf_errmsg(-1));
386		exit(EXIT_FAILURE);
387	}
388
389	if (lte->ehdr.e_type != ET_EXEC && lte->ehdr.e_type != ET_DYN) {
390		fprintf(stderr, "\"%s\" is neither an ELF executable"
391			" nor a shared library\n", filename);
392		exit(EXIT_FAILURE);
393	}
394
395	if (1
396#ifdef LT_ELF_MACHINE
397	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS
398		|| lte->ehdr.e_machine != LT_ELF_MACHINE)
399#endif
400#ifdef LT_ELF_MACHINE2
401	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS2
402		|| lte->ehdr.e_machine != LT_ELF_MACHINE2)
403#endif
404#ifdef LT_ELF_MACHINE3
405	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS3
406		|| lte->ehdr.e_machine != LT_ELF_MACHINE3)
407#endif
408		) {
409		fprintf(stderr,
410			"\"%s\" is ELF from incompatible architecture\n",
411			filename);
412		exit(EXIT_FAILURE);
413	}
414
415	VECT_INIT(&lte->plt_relocs, GElf_Rela);
416
417	return 0;
418}
419
420void
421ltelf_destroy(struct ltelf *lte)
422{
423	debug(DEBUG_FUNCTION, "close_elf()");
424	elf_end(lte->elf);
425	close(lte->fd);
426	VECT_DESTROY(&lte->plt_relocs, GElf_Rela, NULL, NULL);
427}
428
429static void
430read_symbol_table(struct ltelf *lte, const char *filename,
431		  Elf_Scn *scn, GElf_Shdr *shdr, const char *name,
432		  Elf_Data **datap, size_t *countp, const char **strsp)
433{
434	*datap = elf_getdata(scn, NULL);
435	*countp = shdr->sh_size / shdr->sh_entsize;
436	if ((*datap == NULL || elf_getdata(scn, *datap) != NULL)
437	    && options.static_filter != NULL) {
438		fprintf(stderr, "Couldn't get data of section"
439			" %s from \"%s\": %s\n",
440			name, filename, elf_errmsg(-1));
441		exit(EXIT_FAILURE);
442	}
443
444	scn = elf_getscn(lte->elf, shdr->sh_link);
445	GElf_Shdr shdr2;
446	if (scn == NULL || gelf_getshdr(scn, &shdr2) == NULL) {
447		fprintf(stderr, "Couldn't get header of section"
448			" #%d from \"%s\": %s\n",
449			shdr->sh_link, filename, elf_errmsg(-1));
450		exit(EXIT_FAILURE);
451	}
452
453	Elf_Data *data = elf_getdata(scn, NULL);
454	if (data == NULL || elf_getdata(scn, data) != NULL
455	    || shdr2.sh_size != data->d_size || data->d_off) {
456		fprintf(stderr, "Couldn't get data of section"
457			" #%d from \"%s\": %s\n",
458			shdr2.sh_link, filename, elf_errmsg(-1));
459		exit(EXIT_FAILURE);
460	}
461
462	*strsp = data->d_buf;
463}
464
465static int
466rel_to_rela(struct ltelf *lte, const GElf_Rel *rel, GElf_Rela *rela)
467{
468	rela->r_offset = rel->r_offset;
469	rela->r_info = rel->r_info;
470
471	Elf_Scn *sec;
472	GElf_Shdr shdr;
473	if (elf_get_section_covering(lte, rel->r_offset, &sec, &shdr) < 0
474	    || sec == NULL)
475		return -1;
476
477	Elf_Data *data = elf_loaddata(sec, &shdr);
478	if (data == NULL)
479		return -1;
480
481	GElf_Xword offset = rel->r_offset - shdr.sh_addr - data->d_off;
482	uint64_t value;
483	if (lte->ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
484		uint32_t tmp;
485		if (elf_read_u32(data, offset, &tmp) < 0)
486			return -1;
487		value = tmp;
488	} else if (elf_read_u64(data, offset, &value) < 0) {
489		return -1;
490	}
491
492	rela->r_addend = value;
493	return 0;
494}
495
496int
497elf_read_relocs(struct ltelf *lte, Elf_Scn *scn, GElf_Shdr *shdr,
498		struct vect *rela_vec)
499{
500	if (vect_reserve_additional(rela_vec, lte->ehdr.e_shnum) < 0)
501		return -1;
502
503	Elf_Data *relplt = elf_loaddata(scn, shdr);
504	if (relplt == NULL) {
505		fprintf(stderr, "Couldn't load .rel*.plt data.\n");
506		return -1;
507	}
508
509	if ((shdr->sh_size % shdr->sh_entsize) != 0) {
510		fprintf(stderr, ".rel*.plt size (%" PRIx64 "d) not a multiple "
511			"of its sh_entsize (%" PRIx64 "d).\n",
512			shdr->sh_size, shdr->sh_entsize);
513		return -1;
514	}
515
516	GElf_Xword relplt_count = shdr->sh_size / shdr->sh_entsize;
517	GElf_Xword i;
518	for (i = 0; i < relplt_count; ++i) {
519		GElf_Rela rela;
520		if (relplt->d_type == ELF_T_REL) {
521			GElf_Rel rel;
522			if (gelf_getrel(relplt, i, &rel) == NULL
523			    || rel_to_rela(lte, &rel, &rela) < 0)
524				return -1;
525
526		} else if (gelf_getrela(relplt, i, &rela) == NULL) {
527			return -1;
528		}
529
530		if (VECT_PUSHBACK(rela_vec, &rela) < 0)
531			return -1;
532	}
533
534	return 0;
535}
536
537int
538elf_load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
539{
540	Elf_Scn *scn;
541	GElf_Shdr shdr;
542	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
543	    || scn == NULL) {
544	fail:
545		fprintf(stderr, "Couldn't get SHT_DYNAMIC: %s\n",
546			elf_errmsg(-1));
547		return -1;
548	}
549
550	Elf_Data *data = elf_loaddata(scn, &shdr);
551	if (data == NULL)
552		goto fail;
553
554	size_t j;
555	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
556		GElf_Dyn dyn;
557		if (gelf_getdyn(data, j, &dyn) == NULL)
558			goto fail;
559
560		if(dyn.d_tag == tag) {
561			*valuep = dyn.d_un.d_ptr;
562			return 0;
563		}
564	}
565
566	return -1;
567}
568
569static int
570ltelf_read_elf(struct ltelf *lte, const char *filename)
571{
572	int i;
573	GElf_Addr relplt_addr = 0;
574	GElf_Addr soname_offset = 0;
575	GElf_Xword relplt_size = 0;
576
577	debug(DEBUG_FUNCTION, "ltelf_read_elf(filename=%s)", filename);
578	debug(1, "Reading ELF from %s...", filename);
579
580	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
581		Elf_Scn *scn;
582		GElf_Shdr shdr;
583		const char *name;
584
585		scn = elf_getscn(lte->elf, i);
586		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
587			fprintf(stderr,	"Couldn't get section #%d from"
588				" \"%s\": %s\n", i, filename, elf_errmsg(-1));
589			exit(EXIT_FAILURE);
590		}
591
592		name = elf_strptr(lte->elf, lte->ehdr.e_shstrndx, shdr.sh_name);
593		if (name == NULL) {
594			fprintf(stderr,	"Couldn't get name of section #%d from"
595				" \"%s\": %s\n", i, filename, elf_errmsg(-1));
596			exit(EXIT_FAILURE);
597		}
598
599		if (shdr.sh_type == SHT_SYMTAB) {
600			read_symbol_table(lte, filename,
601					  scn, &shdr, name, &lte->symtab,
602					  &lte->symtab_count, &lte->strtab);
603
604		} else if (shdr.sh_type == SHT_DYNSYM) {
605			read_symbol_table(lte, filename,
606					  scn, &shdr, name, &lte->dynsym,
607					  &lte->dynsym_count, &lte->dynstr);
608
609		} else if (shdr.sh_type == SHT_DYNAMIC) {
610			Elf_Data *data;
611			size_t j;
612
613			lte->dyn_addr = shdr.sh_addr + lte->bias;
614			lte->dyn_sz = shdr.sh_size;
615
616			data = elf_getdata(scn, NULL);
617			if (data == NULL || elf_getdata(scn, data) != NULL) {
618				fprintf(stderr, "Couldn't get .dynamic data"
619					" from \"%s\": %s\n",
620					filename, strerror(errno));
621				exit(EXIT_FAILURE);
622			}
623
624			for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
625				GElf_Dyn dyn;
626
627				if (gelf_getdyn(data, j, &dyn) == NULL) {
628					fprintf(stderr, "Couldn't get .dynamic"
629						" data from \"%s\": %s\n",
630						filename, strerror(errno));
631					exit(EXIT_FAILURE);
632				}
633				if (dyn.d_tag == DT_JMPREL)
634					relplt_addr = dyn.d_un.d_ptr;
635				else if (dyn.d_tag == DT_PLTRELSZ)
636					relplt_size = dyn.d_un.d_val;
637				else if (dyn.d_tag == DT_SONAME)
638					soname_offset = dyn.d_un.d_val;
639			}
640		} else if (shdr.sh_type == SHT_PROGBITS
641			   || shdr.sh_type == SHT_NOBITS) {
642			if (strcmp(name, ".plt") == 0) {
643				lte->plt_addr = shdr.sh_addr;
644				lte->plt_size = shdr.sh_size;
645				lte->plt_data = elf_loaddata(scn, &shdr);
646				if (lte->plt_data == NULL)
647					fprintf(stderr,
648						"Can't load .plt data\n");
649				lte->plt_flags = shdr.sh_flags;
650			}
651#ifdef ARCH_SUPPORTS_OPD
652			else if (strcmp(name, ".opd") == 0) {
653				lte->opd_addr = (GElf_Addr *) (long) shdr.sh_addr;
654				lte->opd_size = shdr.sh_size;
655				lte->opd = elf_rawdata(scn, NULL);
656			}
657#endif
658		}
659	}
660
661	if (lte->dynsym == NULL || lte->dynstr == NULL) {
662		fprintf(stderr, "Couldn't find .dynsym or .dynstr in \"%s\"\n",
663			filename);
664		exit(EXIT_FAILURE);
665	}
666
667	if (!relplt_addr || !lte->plt_addr) {
668		debug(1, "%s has no PLT relocations", filename);
669	} else if (relplt_size == 0) {
670		debug(1, "%s has unknown PLT size", filename);
671	} else {
672		for (i = 1; i < lte->ehdr.e_shnum; ++i) {
673			Elf_Scn *scn;
674			GElf_Shdr shdr;
675
676			scn = elf_getscn(lte->elf, i);
677			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
678				fprintf(stderr, "Couldn't get section header"
679					" from \"%s\": %s\n",
680					filename, elf_errmsg(-1));
681				exit(EXIT_FAILURE);
682			}
683			if (shdr.sh_addr == relplt_addr
684			    && shdr.sh_size == relplt_size) {
685				if (elf_read_relocs(lte, scn, &shdr,
686						    &lte->plt_relocs) < 0) {
687					fprintf(stderr, "Couldn't get .rel*.plt"
688						" data from \"%s\": %s\n",
689						filename, elf_errmsg(-1));
690					exit(EXIT_FAILURE);
691				}
692				break;
693			}
694		}
695
696		if (i == lte->ehdr.e_shnum) {
697			fprintf(stderr,
698				"Couldn't find .rel*.plt section in \"%s\"\n",
699				filename);
700			exit(EXIT_FAILURE);
701		}
702	}
703	debug(1, "%s %zd PLT relocations", filename,
704	      vect_size(&lte->plt_relocs));
705
706	if (soname_offset != 0)
707		lte->soname = lte->dynstr + soname_offset;
708
709	return 0;
710}
711
712#ifndef ARCH_HAVE_GET_SYMINFO
713int
714arch_get_sym_info(struct ltelf *lte, const char *filename,
715		  size_t sym_index, GElf_Rela *rela, GElf_Sym *sym)
716{
717	return gelf_getsym(lte->dynsym,
718			   ELF64_R_SYM(rela->r_info), sym) != NULL ? 0 : -1;
719}
720#endif
721
722int
723default_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
724			  const char *a_name, GElf_Rela *rela, size_t ndx,
725			  struct library_symbol **ret)
726{
727	char *name = strdup(a_name);
728	if (name == NULL) {
729	fail_message:
730		fprintf(stderr, "Couldn't create symbol for PLT entry: %s\n",
731			strerror(errno));
732	fail:
733		free(name);
734		return -1;
735	}
736
737	GElf_Addr addr = arch_plt_sym_val(lte, ndx, rela);
738
739	struct library_symbol *libsym = malloc(sizeof(*libsym));
740	if (libsym == NULL)
741		goto fail_message;
742
743	/* XXX The double cast should be removed when
744	 * arch_addr_t becomes integral type.  */
745	arch_addr_t taddr = (arch_addr_t)
746		(uintptr_t)(addr + lte->bias);
747
748	if (library_symbol_init(libsym, taddr, name, 1, LS_TOPLT_EXEC) < 0) {
749		free(libsym);
750		goto fail;
751	}
752
753	libsym->next = *ret;
754	*ret = libsym;
755	return 0;
756}
757
758int
759elf_add_plt_entry(struct process *proc, struct ltelf *lte,
760		  const char *name, GElf_Rela *rela, size_t idx,
761		  struct library_symbol **ret)
762{
763	enum plt_status plts
764		= arch_elf_add_plt_entry(proc, lte, name, rela, idx, ret);
765
766	if (plts == PLT_DEFAULT)
767		plts = os_elf_add_plt_entry(proc, lte, name, rela, idx, ret);
768
769	switch (plts) {
770	case PLT_DEFAULT:
771		return default_elf_add_plt_entry(proc, lte, name,
772						 rela, idx, ret);
773	case PLT_FAIL:
774		return -1;
775	case PLT_OK:
776		return 0;
777	}
778
779	assert(! "Invalid return from X_elf_add_plt_entry!");
780	abort();
781}
782
783static void
784mark_chain_latent(struct library_symbol *libsym)
785{
786	for (; libsym != NULL; libsym = libsym->next) {
787		debug(DEBUG_FUNCTION, "marking %s latent", libsym->name);
788		libsym->latent = 1;
789	}
790}
791
792static void
793filter_symbol_chain(struct filter *filter,
794		    struct library_symbol **libsymp, struct library *lib)
795{
796	assert(libsymp != NULL);
797	struct library_symbol **ptr = libsymp;
798	while (*ptr != NULL) {
799		if (filter_matches_symbol(filter, (*ptr)->name, lib)) {
800			ptr = &(*ptr)->next;
801		} else {
802			struct library_symbol *sym = *ptr;
803			*ptr = (*ptr)->next;
804			library_symbol_destroy(sym);
805			free(sym);
806		}
807	}
808}
809
810static int
811populate_plt(struct process *proc, const char *filename,
812	     struct ltelf *lte, struct library *lib)
813{
814	const bool latent_plts = options.export_filter != NULL;
815	const size_t count = vect_size(&lte->plt_relocs);
816
817	size_t i;
818	for (i = 0; i < count; ++i) {
819		GElf_Rela *rela = VECT_ELEMENT(&lte->plt_relocs, GElf_Rela, i);
820		GElf_Sym sym;
821
822		switch (arch_get_sym_info(lte, filename, i, rela, &sym)) {
823		default:
824			fprintf(stderr,
825				"Couldn't get relocation for symbol #%zd"
826				" from \"%s\": %s\n",
827				i, filename, elf_errmsg(-1));
828			/* Fall through.  */
829		case 1:
830			continue; /* Skip this entry.  */
831		case 0:
832			break;
833		}
834
835		char const *name = lte->dynstr + sym.st_name;
836		int matched = filter_matches_symbol(options.plt_filter,
837						    name, lib);
838
839		struct library_symbol *libsym = NULL;
840		if (elf_add_plt_entry(proc, lte, name, rela, i, &libsym) < 0)
841			return -1;
842
843		/* If we didn't match the PLT entry, filter the chain
844		 * to only include the matching symbols (but include
845		 * all if we are adding latent symbols) to allow
846		 * backends to override the PLT symbol's name.  */
847
848		if (! matched && ! latent_plts)
849			filter_symbol_chain(options.plt_filter, &libsym, lib);
850
851		if (libsym != NULL) {
852			/* If we are adding those symbols just for
853			 * tracing exports, mark them all latent.  */
854			if (! matched && latent_plts)
855				mark_chain_latent(libsym);
856			library_add_symbol(lib, libsym);
857		}
858	}
859	return 0;
860}
861
862static void
863delete_symbol_chain(struct library_symbol *libsym)
864{
865	while (libsym != NULL) {
866		struct library_symbol *tmp = libsym->next;
867		library_symbol_destroy(libsym);
868		free(libsym);
869		libsym = tmp;
870	}
871}
872
873/* When -x rules result in request to trace several aliases, we only
874 * want to add such symbol once.  The only way that those symbols
875 * differ in is their name, e.g. in glibc you have __GI___libc_free,
876 * __cfree, __free, __libc_free, cfree and free all defined on the
877 * same address.  So instead we keep this unique symbol struct for
878 * each address, and replace name in libsym with a shorter variant if
879 * we find it.  */
880struct unique_symbol {
881	arch_addr_t addr;
882	struct library_symbol *libsym;
883};
884
885static int
886unique_symbol_cmp(const void *key, const void *val)
887{
888	const struct unique_symbol *sym_key = key;
889	const struct unique_symbol *sym_val = val;
890	return sym_key->addr != sym_val->addr;
891}
892
893static enum callback_status
894symbol_with_address(struct library_symbol *sym, void *addrptr)
895{
896	return sym->enter_addr == *(arch_addr_t *)addrptr
897		? CBS_STOP : CBS_CONT;
898}
899
900static int
901populate_this_symtab(struct process *proc, const char *filename,
902		     struct ltelf *lte, struct library *lib,
903		     Elf_Data *symtab, const char *strtab, size_t count,
904		     struct library_exported_name **names)
905{
906	/* If a valid NAMES is passed, we pass in *NAMES a list of
907	 * symbol names that this library exports.  */
908	if (names != NULL)
909		*names = NULL;
910
911	/* Using sorted array would be arguably better, but this
912	 * should be well enough for the number of symbols that we
913	 * typically deal with.  */
914	size_t num_symbols = 0;
915	struct unique_symbol *symbols = malloc(sizeof(*symbols) * count);
916	if (symbols == NULL) {
917		fprintf(stderr, "couldn't insert symbols for -x: %s\n",
918			strerror(errno));
919		return -1;
920	}
921
922	GElf_Word secflags[lte->ehdr.e_shnum];
923	size_t i;
924	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
925		Elf_Scn *scn = elf_getscn(lte->elf, i);
926		GElf_Shdr shdr;
927		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
928			secflags[i] = 0;
929		else
930			secflags[i] = shdr.sh_flags;
931	}
932
933	for (i = 0; i < count; ++i) {
934		GElf_Sym sym;
935		if (gelf_getsym(symtab, i, &sym) == NULL) {
936			fprintf(stderr,
937				"couldn't get symbol #%zd from %s: %s\n",
938				i, filename, elf_errmsg(-1));
939			continue;
940		}
941
942		if (sym.st_value == 0 || sym.st_shndx == STN_UNDEF
943		    /* Also ignore any special values besides direct
944		     * section references.  */
945		    || sym.st_shndx >= lte->ehdr.e_shnum)
946			continue;
947
948		/* Find symbol name and snip version.  */
949		const char *orig_name = strtab + sym.st_name;
950		const char *version = strchr(orig_name, '@');
951		size_t len = version != NULL ? (assert(version > orig_name),
952						(size_t)(version - orig_name))
953			: strlen(orig_name);
954		char name[len + 1];
955		memcpy(name, orig_name, len);
956		name[len] = 0;
957
958		/* If we are interested in exports, store this name.  */
959		if (names != NULL) {
960			struct library_exported_name *export
961				= malloc(sizeof *export);
962			char *name_copy = strdup(name);
963
964			if (name_copy == NULL || export == NULL) {
965				free(name_copy);
966				free(export);
967				fprintf(stderr, "Couldn't store symbol %s.  "
968					"Tracing may be incomplete.\n", name);
969			} else {
970				export->name = name_copy;
971				export->own_name = 1;
972				export->next = *names;
973				*names = export;
974			}
975		}
976
977		/* If the symbol is not matched, skip it.  We already
978		 * stored it to export list above.  */
979		if (!filter_matches_symbol(options.static_filter, name, lib))
980			continue;
981
982		arch_addr_t addr = (arch_addr_t)
983			(uintptr_t)(sym.st_value + lte->bias);
984		arch_addr_t naddr;
985
986		/* On arches that support OPD, the value of typical
987		 * function symbol will be a pointer to .opd, but some
988		 * will point directly to .text.  We don't want to
989		 * translate those.  */
990		if (secflags[sym.st_shndx] & SHF_EXECINSTR) {
991			naddr = addr;
992		} else if (arch_translate_address(lte, addr, &naddr) < 0) {
993			fprintf(stderr,
994				"couldn't translate address of %s@%s: %s\n",
995				name, lib->soname, strerror(errno));
996			continue;
997		}
998
999		char *full_name = strdup(name);
1000		if (full_name == NULL) {
1001			fprintf(stderr, "couldn't copy name of %s@%s: %s\n",
1002				name, lib->soname, strerror(errno));
1003			continue;
1004		}
1005
1006		struct library_symbol *libsym = NULL;
1007		enum plt_status plts
1008			= arch_elf_add_func_entry(proc, lte, &sym,
1009						  naddr, full_name, &libsym);
1010		if (plts == PLT_DEFAULT)
1011			plts = os_elf_add_func_entry(proc, lte, &sym,
1012						     naddr, full_name, &libsym);
1013
1014		switch (plts) {
1015		case PLT_DEFAULT:;
1016			/* Put the default symbol to the chain.  */
1017			struct library_symbol *tmp = malloc(sizeof *tmp);
1018			if (tmp == NULL
1019			    || library_symbol_init(tmp, naddr, full_name, 1,
1020						   LS_TOPLT_NONE) < 0) {
1021				free(tmp);
1022
1023				/* Either add the whole bunch, or none
1024				 * of it.  Note that for PLT_FAIL we
1025				 * don't do this--it's the callee's
1026				 * job to clean up after itself before
1027				 * it bails out.  */
1028				delete_symbol_chain(libsym);
1029				libsym = NULL;
1030
1031		case PLT_FAIL:
1032				fprintf(stderr, "Couldn't add symbol %s@%s "
1033					"for tracing.\n", name, lib->soname);
1034
1035				break;
1036			}
1037
1038			full_name = NULL;
1039			tmp->next = libsym;
1040			libsym = tmp;
1041			break;
1042
1043		case PLT_OK:
1044			break;
1045		}
1046
1047		free(full_name);
1048
1049		struct library_symbol *tmp;
1050		for (tmp = libsym; tmp != NULL; ) {
1051			/* Look whether we already have a symbol for
1052			 * this address.  If not, add this one.  If
1053			 * yes, look if we should pick the new symbol
1054			 * name.  */
1055
1056			struct unique_symbol key = { tmp->enter_addr, NULL };
1057			struct unique_symbol *unique
1058				= lsearch(&key, symbols, &num_symbols,
1059					  sizeof *symbols, &unique_symbol_cmp);
1060
1061			if (unique->libsym == NULL) {
1062				unique->libsym = tmp;
1063				unique->addr = tmp->enter_addr;
1064				tmp = tmp->next;
1065				unique->libsym->next = NULL;
1066			} else {
1067				if (strlen(tmp->name)
1068				    < strlen(unique->libsym->name)) {
1069					library_symbol_set_name
1070						(unique->libsym, tmp->name, 1);
1071					tmp->name = NULL;
1072				}
1073				struct library_symbol *next = tmp->next;
1074				library_symbol_destroy(tmp);
1075				free(tmp);
1076				tmp = next;
1077			}
1078		}
1079	}
1080
1081	/* Now we do the union of this set of unique symbols with
1082	 * what's already in the library.  */
1083	for (i = 0; i < num_symbols; ++i) {
1084		struct library_symbol *this_sym = symbols[i].libsym;
1085		assert(this_sym != NULL);
1086		struct library_symbol *other
1087			= library_each_symbol(lib, NULL, symbol_with_address,
1088					      &this_sym->enter_addr);
1089		if (other != NULL) {
1090			library_symbol_destroy(this_sym);
1091			free(this_sym);
1092			symbols[i].libsym = NULL;
1093		}
1094	}
1095
1096	for (i = 0; i < num_symbols; ++i)
1097		if (symbols[i].libsym != NULL)
1098			library_add_symbol(lib, symbols[i].libsym);
1099
1100	free(symbols);
1101	return 0;
1102}
1103
1104static int
1105populate_symtab(struct process *proc, const char *filename,
1106		struct ltelf *lte, struct library *lib,
1107		int symtabs, int exports)
1108{
1109	int status;
1110	if (symtabs && lte->symtab != NULL && lte->strtab != NULL
1111	    && (status = populate_this_symtab(proc, filename, lte, lib,
1112					      lte->symtab, lte->strtab,
1113					      lte->symtab_count, NULL)) < 0)
1114		return status;
1115
1116	/* Check whether we want to trace symbols implemented by this
1117	 * library (-l).  */
1118	struct library_exported_name **names = NULL;
1119	if (exports) {
1120		debug(DEBUG_FUNCTION, "-l matches %s", lib->soname);
1121		names = &lib->exported_names;
1122	}
1123
1124	return populate_this_symtab(proc, filename, lte, lib,
1125				    lte->dynsym, lte->dynstr,
1126				    lte->dynsym_count, names);
1127}
1128
1129static int
1130read_module(struct library *lib, struct process *proc,
1131	    const char *filename, GElf_Addr bias, int main)
1132{
1133	struct ltelf lte;
1134	if (ltelf_init(&lte, filename) < 0)
1135		return -1;
1136
1137	/* XXX When we abstract ABI into a module, this should instead
1138	 * become something like
1139	 *
1140	 *    proc->abi = arch_get_abi(lte.ehdr);
1141	 *
1142	 * The code in ltelf_init needs to be replaced by this logic.
1143	 * Be warned that libltrace.c calls ltelf_init as well to
1144	 * determine whether ABI is supported.  This is to get
1145	 * reasonable error messages when trying to run 64-bit binary
1146	 * with 32-bit ltrace.  It is desirable to preserve this.  */
1147	proc->e_machine = lte.ehdr.e_machine;
1148	proc->e_class = lte.ehdr.e_ident[EI_CLASS];
1149	get_arch_dep(proc);
1150
1151	/* Find out the base address.  For PIE main binaries we look
1152	 * into auxv, otherwise we scan phdrs.  */
1153	if (main && lte.ehdr.e_type == ET_DYN) {
1154		arch_addr_t entry;
1155		if (process_get_entry(proc, &entry, NULL) < 0) {
1156			fprintf(stderr, "Couldn't find entry of PIE %s\n",
1157				filename);
1158		fail:
1159			ltelf_destroy(&lte);
1160			return -1;
1161		}
1162		/* XXX The double cast should be removed when
1163		 * arch_addr_t becomes integral type.  */
1164		lte.entry_addr = (GElf_Addr)(uintptr_t)entry;
1165		lte.bias = (GElf_Addr)(uintptr_t)entry - lte.ehdr.e_entry;
1166
1167	} else {
1168		GElf_Phdr phdr;
1169		size_t i;
1170		for (i = 0; gelf_getphdr (lte.elf, i, &phdr) != NULL; ++i) {
1171			if (phdr.p_type == PT_LOAD) {
1172				lte.base_addr = phdr.p_vaddr + bias;
1173				break;
1174			}
1175		}
1176
1177		lte.bias = bias;
1178		lte.entry_addr = lte.ehdr.e_entry + lte.bias;
1179
1180		if (lte.base_addr == 0) {
1181			fprintf(stderr,
1182				"Couldn't determine base address of %s\n",
1183				filename);
1184			goto fail;
1185		}
1186	}
1187
1188	if (ltelf_read_elf(&lte, filename) < 0)
1189		goto fail;
1190
1191	if (arch_elf_init(&lte, lib) < 0) {
1192		fprintf(stderr, "Backend initialization failed.\n");
1193		goto fail;
1194	}
1195
1196	if (lib == NULL)
1197		goto fail;
1198
1199	/* Note that we set soname and pathname as soon as they are
1200	 * allocated, so in case of further errors, this get released
1201	 * when LIB is released, which should happen in the caller
1202	 * when we return error.  */
1203
1204	if (lib->pathname == NULL) {
1205		char *pathname = strdup(filename);
1206		if (pathname == NULL)
1207			goto fail;
1208		library_set_pathname(lib, pathname, 1);
1209	}
1210
1211	if (lte.soname != NULL) {
1212		char *soname = strdup(lte.soname);
1213		if (soname == NULL)
1214			goto fail;
1215		library_set_soname(lib, soname, 1);
1216	} else {
1217		const char *soname = rindex(lib->pathname, '/');
1218		if (soname != NULL)
1219			soname += 1;
1220		else
1221			soname = lib->pathname;
1222		library_set_soname(lib, soname, 0);
1223	}
1224
1225	/* XXX The double cast should be removed when
1226	 * arch_addr_t becomes integral type.  */
1227	arch_addr_t entry = (arch_addr_t)(uintptr_t)lte.entry_addr;
1228	if (arch_translate_address(&lte, entry, &entry) < 0)
1229		goto fail;
1230
1231	/* XXX The double cast should be removed when
1232	 * arch_addr_t becomes integral type.  */
1233	lib->base = (arch_addr_t)(uintptr_t)lte.base_addr;
1234	lib->entry = entry;
1235	/* XXX The double cast should be removed when
1236	 * arch_addr_t becomes integral type.  */
1237	lib->dyn_addr = (arch_addr_t)(uintptr_t)lte.dyn_addr;
1238
1239	/* There are two reasons that we need to inspect symbol tables
1240	 * or populate PLT entries.  Either the user requested
1241	 * corresponding tracing features (respectively -x and -e), or
1242	 * they requested tracing exported symbols (-l).
1243	 *
1244	 * In the latter case we need to keep even those PLT slots
1245	 * that are not requested by -e (but we keep them latent).  We
1246	 * also need to inspect .dynsym to find what exports this
1247	 * library provide, to turn on existing latent PLT
1248	 * entries.  */
1249
1250	int plts = filter_matches_library(options.plt_filter, lib);
1251	if ((plts || options.export_filter != NULL)
1252	    && populate_plt(proc, filename, &lte, lib) < 0)
1253		goto fail;
1254
1255	int exports = filter_matches_library(options.export_filter, lib);
1256	int symtabs = filter_matches_library(options.static_filter, lib);
1257	if ((symtabs || exports)
1258	    && populate_symtab(proc, filename, &lte, lib,
1259			       symtabs, exports) < 0)
1260		goto fail;
1261
1262	arch_elf_destroy(&lte);
1263	ltelf_destroy(&lte);
1264	return 0;
1265}
1266
1267int
1268ltelf_read_library(struct library *lib, struct process *proc,
1269		   const char *filename, GElf_Addr bias)
1270{
1271	return read_module(lib, proc, filename, bias, 0);
1272}
1273
1274
1275struct library *
1276ltelf_read_main_binary(struct process *proc, const char *path)
1277{
1278	struct library *lib = malloc(sizeof(*lib));
1279	if (lib == NULL || library_init(lib, LT_LIBTYPE_MAIN) < 0) {
1280		free(lib);
1281		return NULL;
1282	}
1283	library_set_pathname(lib, path, 0);
1284
1285	/* There is a race between running the process and reading its
1286	 * binary for internal consumption.  So open the binary from
1287	 * the /proc filesystem.  XXX Note that there is similar race
1288	 * for libraries, but there we don't have a nice answer like
1289	 * that.  Presumably we could read the DSOs from the process
1290	 * memory image, but that's not currently done.  */
1291	char *fname = pid2name(proc->pid);
1292	if (fname == NULL
1293	    || read_module(lib, proc, fname, 0, 1) < 0) {
1294		library_destroy(lib);
1295		free(lib);
1296		lib = NULL;
1297	}
1298
1299	free(fname);
1300	return lib;
1301}
1302