ltrace-elf.c revision b420a226cd2fc5d6028adcaf236c512a1f1fb437
1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2006,2010,2011,2012,2013 Petr Machata, Red Hat Inc.
4 * Copyright (C) 2010 Zachary T Welch, CodeSourcery
5 * Copyright (C) 2010 Joe Damato
6 * Copyright (C) 1997,1998,2001,2004,2007,2008,2009 Juan Cespedes
7 * Copyright (C) 2006 Olaf Hering, SUSE Linux GmbH
8 * Copyright (C) 2006 Eric Vaitl, Cisco Systems, Inc.
9 * Copyright (C) 2006 Paul Gilliam, IBM Corporation
10 * Copyright (C) 2006 Ian Wienand
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 * 02110-1301 USA
26 */
27
28#include "config.h"
29
30#include <assert.h>
31#ifdef	__linux__
32#include <endian.h>
33#endif
34#include <errno.h>
35#include <fcntl.h>
36#include <gelf.h>
37#include <inttypes.h>
38#include <search.h>
39#include <stdint.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <strings.h>
44#include <unistd.h>
45
46#include "backend.h"
47#include "filter.h"
48#include "library.h"
49#include "ltrace-elf.h"
50#include "proc.h"
51#include "debug.h"
52#include "options.h"
53
54#ifndef ARCH_HAVE_LTELF_DATA
55int
56arch_elf_init(struct ltelf *lte, struct library *lib)
57{
58	return 0;
59}
60
61void
62arch_elf_destroy(struct ltelf *lte)
63{
64}
65#endif
66
67int
68default_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
69			  const char *a_name, GElf_Rela *rela, size_t ndx,
70			  struct library_symbol **ret)
71{
72	char *name = strdup(a_name);
73	if (name == NULL) {
74	fail_message:
75		fprintf(stderr, "Couldn't create symbol for PLT entry: %s\n",
76			strerror(errno));
77	fail:
78		free(name);
79		return -1;
80	}
81
82	GElf_Addr addr = arch_plt_sym_val(lte, ndx, rela);
83
84	struct library_symbol *libsym = malloc(sizeof(*libsym));
85	if (libsym == NULL)
86		goto fail_message;
87
88	/* XXX The double cast should be removed when
89	 * arch_addr_t becomes integral type.  */
90	arch_addr_t taddr = (arch_addr_t)
91		(uintptr_t)(addr + lte->bias);
92
93	if (library_symbol_init(libsym, taddr, name, 1, LS_TOPLT_EXEC) < 0) {
94		free(libsym);
95		goto fail;
96	}
97
98	libsym->next = *ret;
99	*ret = libsym;
100	return 0;
101}
102
103#ifndef ARCH_HAVE_ADD_PLT_ENTRY
104enum plt_status
105arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
106		       const char *a_name, GElf_Rela *rela, size_t ndx,
107		       struct library_symbol **ret)
108{
109	return PLT_DEFAULT;
110}
111#endif
112
113Elf_Data *
114elf_loaddata(Elf_Scn *scn, GElf_Shdr *shdr)
115{
116	Elf_Data *data = elf_getdata(scn, NULL);
117	if (data == NULL || elf_getdata(scn, data) != NULL
118	    || data->d_off || data->d_size != shdr->sh_size)
119		return NULL;
120	return data;
121}
122
123static int
124elf_get_section_if(struct ltelf *lte, Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr,
125		   int (*predicate)(Elf_Scn *, GElf_Shdr *, void *data),
126		   void *data)
127{
128	int i;
129	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
130		Elf_Scn *scn;
131		GElf_Shdr shdr;
132
133		scn = elf_getscn(lte->elf, i);
134		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
135			debug(1, "Couldn't read section or header.");
136			return -1;
137		}
138		if (predicate(scn, &shdr, data)) {
139			*tgt_sec = scn;
140			*tgt_shdr = shdr;
141			return 0;
142		}
143	}
144
145	*tgt_sec = NULL;
146	return 0;
147}
148
149static int
150inside_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
151{
152	GElf_Addr addr = *(GElf_Addr *)data;
153	return addr >= shdr->sh_addr
154		&& addr < shdr->sh_addr + shdr->sh_size;
155}
156
157int
158elf_get_section_covering(struct ltelf *lte, GElf_Addr addr,
159			 Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
160{
161	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
162				  &inside_p, &addr);
163}
164
165static int
166type_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
167{
168	GElf_Word type = *(GElf_Word *)data;
169	return shdr->sh_type == type;
170}
171
172int
173elf_get_section_type(struct ltelf *lte, GElf_Word type,
174		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
175{
176	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
177				  &type_p, &type);
178}
179
180struct section_named_data {
181	struct ltelf *lte;
182	const char *name;
183};
184
185static int
186name_p(Elf_Scn *scn, GElf_Shdr *shdr, void *d)
187{
188	struct section_named_data *data = d;
189	const char *name = elf_strptr(data->lte->elf,
190				      data->lte->ehdr.e_shstrndx,
191				      shdr->sh_name);
192	return strcmp(name, data->name) == 0;
193}
194
195int
196elf_get_section_named(struct ltelf *lte, const char *name,
197		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
198{
199	struct section_named_data data = {
200		.lte = lte,
201		.name = name,
202	};
203	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
204				  &name_p, &data);
205}
206
207static struct elf_each_symbol_t
208each_symbol_in(Elf_Data *symtab, const char *strtab, size_t count,
209	       unsigned i,
210	       enum callback_status (*cb)(GElf_Sym *symbol,
211					  const char *name, void *data),
212	       void *data)
213{
214	for (; i < count; ++i) {
215		GElf_Sym sym;
216		if (gelf_getsym(symtab, i, &sym) == NULL)
217			return (struct elf_each_symbol_t){ i, -2 };
218
219		switch (cb(&sym, strtab + sym.st_name, data)) {
220		case CBS_FAIL:
221			return (struct elf_each_symbol_t){ i, -1 };
222		case CBS_STOP:
223			return (struct elf_each_symbol_t){ i + 1, 0 };
224		case CBS_CONT:
225			break;
226		}
227	}
228
229	return (struct elf_each_symbol_t){ 0, 0 };
230}
231
232/* N.B.: gelf_getsym takes integer argument.  Since negative values
233 * are invalid as indices, we can use the extra bit to encode which
234 * symbol table we are looking into.  ltrace currently doesn't handle
235 * more than two symbol tables anyway, nor does it handle the xindex
236 * stuff.  */
237struct elf_each_symbol_t
238elf_each_symbol(struct ltelf *lte, unsigned start_after,
239		enum callback_status (*cb)(GElf_Sym *symbol,
240					   const char *name, void *data),
241		void *data)
242{
243	unsigned index = start_after == 0 ? 0 : start_after >> 1;
244
245	/* Go through static symbol table first.  */
246	if ((start_after & 0x1) == 0) {
247		struct elf_each_symbol_t st
248			= each_symbol_in(lte->symtab, lte->strtab,
249					 lte->symtab_count, index, cb, data);
250
251		/* If the iteration stopped prematurely, bail out.  */
252		if (st.restart != 0)
253			return ((struct elf_each_symbol_t)
254				{ st.restart << 1, st.status });
255	}
256
257	struct elf_each_symbol_t st
258		= each_symbol_in(lte->dynsym, lte->dynstr, lte->dynsym_count,
259				 index, cb, data);
260	if (st.restart != 0)
261		return ((struct elf_each_symbol_t)
262			{ st.restart << 1 | 0x1, st.status });
263
264	return (struct elf_each_symbol_t){ 0, 0 };
265}
266
267int
268elf_can_read_next(Elf_Data *data, GElf_Xword offset, GElf_Xword size)
269{
270	assert(data != NULL);
271	if (data->d_size < size || offset > data->d_size - size) {
272		debug(1, "Not enough data to read %"PRId64"-byte value"
273		      " at offset %"PRId64".", size, offset);
274		return 0;
275	}
276	return 1;
277}
278
279#define DEF_READER(NAME, SIZE)						\
280	int								\
281	NAME(Elf_Data *data, GElf_Xword offset, uint##SIZE##_t *retp)	\
282	{								\
283		if (!elf_can_read_next(data, offset, SIZE / 8))		\
284			return -1;					\
285									\
286		if (data->d_buf == NULL) /* NODATA section */ {		\
287			*retp = 0;					\
288			return 0;					\
289		}							\
290									\
291		union {							\
292			uint##SIZE##_t dst;				\
293			char buf[0];					\
294		} u;							\
295		memcpy(u.buf, data->d_buf + offset, sizeof(u.dst));	\
296		*retp = u.dst;						\
297		return 0;						\
298	}
299
300DEF_READER(elf_read_u8, 8)
301DEF_READER(elf_read_u16, 16)
302DEF_READER(elf_read_u32, 32)
303DEF_READER(elf_read_u64, 64)
304
305#undef DEF_READER
306
307#define DEF_READER(NAME, SIZE)						\
308	int								\
309	NAME(Elf_Data *data, GElf_Xword *offset, uint##SIZE##_t *retp)	\
310	{								\
311		int rc = elf_read_u##SIZE(data, *offset, retp);		\
312		if (rc < 0)						\
313			return rc;					\
314		*offset += SIZE / 8;					\
315		return 0;						\
316	}
317
318DEF_READER(elf_read_next_u8, 8)
319DEF_READER(elf_read_next_u16, 16)
320DEF_READER(elf_read_next_u32, 32)
321DEF_READER(elf_read_next_u64, 64)
322
323#undef DEF_READER
324
325int
326elf_read_next_uleb128(Elf_Data *data, GElf_Xword *offset, uint64_t *retp)
327{
328	uint64_t result = 0;
329	int shift = 0;
330	int size = 8 * sizeof result;
331
332	while (1) {
333		uint8_t byte;
334		if (elf_read_next_u8(data, offset, &byte) < 0)
335			return -1;
336
337		uint8_t payload = byte & 0x7f;
338		result |= (uint64_t)payload << shift;
339		shift += 7;
340		if (shift > size && byte != 0x1)
341			return -1;
342		if ((byte & 0x80) == 0)
343			break;
344	}
345
346	if (retp != NULL)
347		*retp = result;
348	return 0;
349}
350
351int
352elf_read_uleb128(Elf_Data *data, GElf_Xword offset, uint64_t *retp)
353{
354	return elf_read_next_uleb128(data, &offset, retp);
355}
356
357int
358open_elf(struct ltelf *lte, const char *filename)
359{
360	lte->fd = open(filename, O_RDONLY);
361	if (lte->fd == -1)
362		return 1;
363
364	elf_version(EV_CURRENT);
365
366#ifdef HAVE_ELF_C_READ_MMAP
367	lte->elf = elf_begin(lte->fd, ELF_C_READ_MMAP, NULL);
368#else
369	lte->elf = elf_begin(lte->fd, ELF_C_READ, NULL);
370#endif
371
372	if (lte->elf == NULL || elf_kind(lte->elf) != ELF_K_ELF) {
373		fprintf(stderr, "\"%s\" is not an ELF file\n", filename);
374		exit(EXIT_FAILURE);
375	}
376
377	if (gelf_getehdr(lte->elf, &lte->ehdr) == NULL) {
378		fprintf(stderr, "can't read ELF header of \"%s\": %s\n",
379			filename, elf_errmsg(-1));
380		exit(EXIT_FAILURE);
381	}
382
383	if (lte->ehdr.e_type != ET_EXEC && lte->ehdr.e_type != ET_DYN) {
384		fprintf(stderr, "\"%s\" is neither an ELF executable"
385			" nor a shared library\n", filename);
386		exit(EXIT_FAILURE);
387	}
388
389	if (1
390#ifdef LT_ELF_MACHINE
391	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS
392		|| lte->ehdr.e_machine != LT_ELF_MACHINE)
393#endif
394#ifdef LT_ELF_MACHINE2
395	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS2
396		|| lte->ehdr.e_machine != LT_ELF_MACHINE2)
397#endif
398#ifdef LT_ELF_MACHINE3
399	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS3
400		|| lte->ehdr.e_machine != LT_ELF_MACHINE3)
401#endif
402		) {
403		fprintf(stderr,
404			"\"%s\" is ELF from incompatible architecture\n",
405			filename);
406		exit(EXIT_FAILURE);
407	}
408
409	return 0;
410}
411
412static void
413read_symbol_table(struct ltelf *lte, const char *filename,
414		  Elf_Scn *scn, GElf_Shdr *shdr, const char *name,
415		  Elf_Data **datap, size_t *countp, const char **strsp)
416{
417	*datap = elf_getdata(scn, NULL);
418	*countp = shdr->sh_size / shdr->sh_entsize;
419	if ((*datap == NULL || elf_getdata(scn, *datap) != NULL)
420	    && options.static_filter != NULL) {
421		fprintf(stderr, "Couldn't get data of section"
422			" %s from \"%s\": %s\n",
423			name, filename, elf_errmsg(-1));
424		exit(EXIT_FAILURE);
425	}
426
427	scn = elf_getscn(lte->elf, shdr->sh_link);
428	GElf_Shdr shdr2;
429	if (scn == NULL || gelf_getshdr(scn, &shdr2) == NULL) {
430		fprintf(stderr, "Couldn't get header of section"
431			" #%d from \"%s\": %s\n",
432			shdr->sh_link, filename, elf_errmsg(-1));
433		exit(EXIT_FAILURE);
434	}
435
436	Elf_Data *data = elf_getdata(scn, NULL);
437	if (data == NULL || elf_getdata(scn, data) != NULL
438	    || shdr2.sh_size != data->d_size || data->d_off) {
439		fprintf(stderr, "Couldn't get data of section"
440			" #%d from \"%s\": %s\n",
441			shdr2.sh_link, filename, elf_errmsg(-1));
442		exit(EXIT_FAILURE);
443	}
444
445	*strsp = data->d_buf;
446}
447
448static int
449do_init_elf(struct ltelf *lte, const char *filename)
450{
451	int i;
452	GElf_Addr relplt_addr = 0;
453	GElf_Addr soname_offset = 0;
454
455	debug(DEBUG_FUNCTION, "do_init_elf(filename=%s)", filename);
456	debug(1, "Reading ELF from %s...", filename);
457
458	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
459		Elf_Scn *scn;
460		GElf_Shdr shdr;
461		const char *name;
462
463		scn = elf_getscn(lte->elf, i);
464		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
465			fprintf(stderr,	"Couldn't get section #%d from"
466				" \"%s\": %s\n", i, filename, elf_errmsg(-1));
467			exit(EXIT_FAILURE);
468		}
469
470		name = elf_strptr(lte->elf, lte->ehdr.e_shstrndx, shdr.sh_name);
471		if (name == NULL) {
472			fprintf(stderr,	"Couldn't get name of section #%d from"
473				" \"%s\": %s\n", i, filename, elf_errmsg(-1));
474			exit(EXIT_FAILURE);
475		}
476
477		if (shdr.sh_type == SHT_SYMTAB) {
478			read_symbol_table(lte, filename,
479					  scn, &shdr, name, &lte->symtab,
480					  &lte->symtab_count, &lte->strtab);
481
482		} else if (shdr.sh_type == SHT_DYNSYM) {
483			read_symbol_table(lte, filename,
484					  scn, &shdr, name, &lte->dynsym,
485					  &lte->dynsym_count, &lte->dynstr);
486
487		} else if (shdr.sh_type == SHT_DYNAMIC) {
488			Elf_Data *data;
489			size_t j;
490
491			lte->dyn_addr = shdr.sh_addr + lte->bias;
492			lte->dyn_sz = shdr.sh_size;
493
494			data = elf_getdata(scn, NULL);
495			if (data == NULL || elf_getdata(scn, data) != NULL) {
496				fprintf(stderr, "Couldn't get .dynamic data"
497					" from \"%s\": %s\n",
498					filename, strerror(errno));
499				exit(EXIT_FAILURE);
500			}
501
502			for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
503				GElf_Dyn dyn;
504
505				if (gelf_getdyn(data, j, &dyn) == NULL) {
506					fprintf(stderr, "Couldn't get .dynamic"
507						" data from \"%s\": %s\n",
508						filename, strerror(errno));
509					exit(EXIT_FAILURE);
510				}
511				if (dyn.d_tag == DT_JMPREL)
512					relplt_addr = dyn.d_un.d_ptr;
513				else if (dyn.d_tag == DT_PLTRELSZ)
514					lte->relplt_size = dyn.d_un.d_val;
515				else if (dyn.d_tag == DT_SONAME)
516					soname_offset = dyn.d_un.d_val;
517			}
518		} else if (shdr.sh_type == SHT_PROGBITS
519			   || shdr.sh_type == SHT_NOBITS) {
520			if (strcmp(name, ".plt") == 0) {
521				lte->plt_addr = shdr.sh_addr;
522				lte->plt_size = shdr.sh_size;
523				lte->plt_data = elf_loaddata(scn, &shdr);
524				if (lte->plt_data == NULL)
525					fprintf(stderr,
526						"Can't load .plt data\n");
527				lte->plt_flags = shdr.sh_flags;
528			}
529#ifdef ARCH_SUPPORTS_OPD
530			else if (strcmp(name, ".opd") == 0) {
531				lte->opd_addr = (GElf_Addr *) (long) shdr.sh_addr;
532				lte->opd_size = shdr.sh_size;
533				lte->opd = elf_rawdata(scn, NULL);
534			}
535#endif
536		}
537	}
538
539	if (lte->dynsym == NULL || lte->dynstr == NULL) {
540		fprintf(stderr, "Couldn't find .dynsym or .dynstr in \"%s\"\n",
541			filename);
542		exit(EXIT_FAILURE);
543	}
544
545	if (!relplt_addr || !lte->plt_addr) {
546		debug(1, "%s has no PLT relocations", filename);
547		lte->relplt = NULL;
548		lte->relplt_count = 0;
549	} else if (lte->relplt_size == 0) {
550		debug(1, "%s has unknown PLT size", filename);
551		lte->relplt = NULL;
552		lte->relplt_count = 0;
553	} else {
554
555		for (i = 1; i < lte->ehdr.e_shnum; ++i) {
556			Elf_Scn *scn;
557			GElf_Shdr shdr;
558
559			scn = elf_getscn(lte->elf, i);
560			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
561				fprintf(stderr, "Couldn't get section header"
562					" from \"%s\": %s\n",
563					filename, elf_errmsg(-1));
564				exit(EXIT_FAILURE);
565			}
566			if (shdr.sh_addr == relplt_addr
567			    && shdr.sh_size == lte->relplt_size) {
568				lte->relplt = elf_getdata(scn, NULL);
569				lte->relplt_count =
570				    shdr.sh_size / shdr.sh_entsize;
571				if (lte->relplt == NULL
572				    || elf_getdata(scn, lte->relplt) != NULL) {
573					fprintf(stderr, "Couldn't get .rel*.plt"
574						" data from \"%s\": %s\n",
575						filename, elf_errmsg(-1));
576					exit(EXIT_FAILURE);
577				}
578				break;
579			}
580		}
581
582		if (i == lte->ehdr.e_shnum) {
583			fprintf(stderr,
584				"Couldn't find .rel*.plt section in \"%s\"\n",
585				filename);
586			exit(EXIT_FAILURE);
587		}
588
589		debug(1, "%s %zd PLT relocations", filename, lte->relplt_count);
590	}
591
592	if (soname_offset != 0)
593		lte->soname = lte->dynstr + soname_offset;
594
595	return 0;
596}
597
598void
599do_close_elf(struct ltelf *lte)
600{
601	debug(DEBUG_FUNCTION, "do_close_elf()");
602	arch_elf_destroy(lte);
603	elf_end(lte->elf);
604	close(lte->fd);
605}
606
607int
608elf_get_sym_info(struct ltelf *lte, const char *filename,
609		 size_t sym_index, GElf_Rela *rela, GElf_Sym *sym)
610{
611	GElf_Rel rel;
612
613	if (lte->relplt->d_type == ELF_T_REL) {
614		if (gelf_getrel(lte->relplt, sym_index, &rel) == NULL)
615			return -1;
616		rela->r_offset = rel.r_offset;
617		rela->r_info = rel.r_info;
618
619		Elf_Scn *sec;
620		GElf_Shdr shdr;
621		if (elf_get_section_covering(lte, rel.r_offset, &sec, &shdr) < 0
622		    || sec == NULL)
623			return -1;
624
625		Elf_Data *data = elf_loaddata(sec, &shdr);
626		if (data == NULL)
627			return -1;
628		GElf_Xword offset = rel.r_offset - shdr.sh_addr - data->d_off;
629		uint64_t value;
630		if (lte->ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
631			uint32_t tmp;
632			if (elf_read_u32(data, offset, &tmp) < 0)
633				return -1;
634			value = tmp;
635		} else if (elf_read_u64(data, offset, &value) < 0) {
636			return -1;
637		}
638
639		rela->r_addend = value;
640
641	} else if (gelf_getrela(lte->relplt, sym_index, rela) == NULL) {
642		return -1;
643	}
644
645	if (ELF64_R_SYM(rela->r_info) >= lte->dynsym_count
646	    || gelf_getsym(lte->dynsym,
647			   ELF64_R_SYM(rela->r_info), sym) == NULL)
648		return -1;
649
650	return 0;
651}
652
653#ifndef ARCH_HAVE_GET_SYMINFO
654int
655arch_get_sym_info(struct ltelf *lte, const char *filename,
656		  size_t sym_index, GElf_Rela *rela, GElf_Sym *sym)
657{
658	return elf_get_sym_info(lte, filename, sym_index, rela, sym);
659}
660#endif
661
662static void
663mark_chain_latent(struct library_symbol *libsym)
664{
665	for (; libsym != NULL; libsym = libsym->next) {
666		debug(DEBUG_FUNCTION, "marking %s latent", libsym->name);
667		libsym->latent = 1;
668	}
669}
670
671static void
672filter_symbol_chain(struct filter *filter,
673		    struct library_symbol **libsymp, struct library *lib)
674{
675	assert(libsymp != NULL);
676	struct library_symbol **ptr = libsymp;
677	while (*ptr != NULL) {
678		if (filter_matches_symbol(filter, (*ptr)->name, lib)) {
679			ptr = &(*ptr)->next;
680		} else {
681			struct library_symbol *sym = *ptr;
682			*ptr = (*ptr)->next;
683			library_symbol_destroy(sym);
684			free(sym);
685		}
686	}
687}
688
689static int
690populate_plt(struct process *proc, const char *filename,
691	     struct ltelf *lte, struct library *lib,
692	     int latent_plts)
693{
694	size_t i;
695	for (i = 0; i < lte->relplt_count; ++i) {
696		GElf_Rela rela;
697		GElf_Sym sym;
698
699		switch (arch_get_sym_info(lte, filename, i, &rela, &sym)) {
700		default:
701			fprintf(stderr,
702				"Couldn't get relocation for symbol #%zd"
703				" from \"%s\": %s\n",
704				i, filename, elf_errmsg(-1));
705			/* Fall through.  */
706		case 1:
707			continue; /* Skip this entry.  */
708		case 0:
709			break;
710		}
711
712		char const *name = lte->dynstr + sym.st_name;
713
714		int matched = filter_matches_symbol(options.plt_filter,
715						    name, lib);
716
717		struct library_symbol *libsym = NULL;
718		switch (arch_elf_add_plt_entry(proc, lte, name,
719					       &rela, i, &libsym)) {
720		case PLT_FAIL:
721				return -1;
722
723		case PLT_DEFAULT:
724			/* Add default entry to the beginning of LIBSYM.  */
725			if (default_elf_add_plt_entry(proc, lte, name,
726						      &rela, i, &libsym) < 0)
727				return -1;
728			/* Fall through.  */
729		case PLT_OK:
730			/* If we didn't match the PLT entry up there,
731			 * filter the chain to only include the
732			 * matching symbols (but include all if we are
733			 * adding latent symbols).  This is to allow
734			 * arch_elf_add_plt_entry to override the PLT
735			 * symbol's name.  */
736			if (!matched && !latent_plts)
737				filter_symbol_chain(options.plt_filter,
738						    &libsym, lib);
739			if (libsym != NULL) {
740				/* If we are adding those symbols just
741				 * for tracing exports, mark them all
742				 * latent.  */
743				if (!matched && latent_plts)
744					mark_chain_latent(libsym);
745				library_add_symbol(lib, libsym);
746			}
747		}
748	}
749	return 0;
750}
751
752/* When -x rules result in request to trace several aliases, we only
753 * want to add such symbol once.  The only way that those symbols
754 * differ in is their name, e.g. in glibc you have __GI___libc_free,
755 * __cfree, __free, __libc_free, cfree and free all defined on the
756 * same address.  So instead we keep this unique symbol struct for
757 * each address, and replace name in libsym with a shorter variant if
758 * we find it.  */
759struct unique_symbol {
760	arch_addr_t addr;
761	struct library_symbol *libsym;
762};
763
764static int
765unique_symbol_cmp(const void *key, const void *val)
766{
767	const struct unique_symbol *sym_key = key;
768	const struct unique_symbol *sym_val = val;
769	return sym_key->addr != sym_val->addr;
770}
771
772static enum callback_status
773symbol_with_address(struct library_symbol *sym, void *addrptr)
774{
775	return sym->enter_addr == *(arch_addr_t *)addrptr
776		? CBS_STOP : CBS_CONT;
777}
778
779static int
780populate_this_symtab(struct process *proc, const char *filename,
781		     struct ltelf *lte, struct library *lib,
782		     Elf_Data *symtab, const char *strtab, size_t count,
783		     struct library_exported_name **names)
784{
785	/* If a valid NAMES is passed, we pass in *NAMES a list of
786	 * symbol names that this library exports.  */
787	if (names != NULL)
788		*names = NULL;
789
790	/* Using sorted array would be arguably better, but this
791	 * should be well enough for the number of symbols that we
792	 * typically deal with.  */
793	size_t num_symbols = 0;
794	struct unique_symbol *symbols = malloc(sizeof(*symbols) * count);
795	if (symbols == NULL) {
796		fprintf(stderr, "couldn't insert symbols for -x: %s\n",
797			strerror(errno));
798		return -1;
799	}
800
801	GElf_Word secflags[lte->ehdr.e_shnum];
802	size_t i;
803	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
804		Elf_Scn *scn = elf_getscn(lte->elf, i);
805		GElf_Shdr shdr;
806		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
807			secflags[i] = 0;
808		else
809			secflags[i] = shdr.sh_flags;
810	}
811
812	for (i = 0; i < count; ++i) {
813		GElf_Sym sym;
814		if (gelf_getsym(symtab, i, &sym) == NULL) {
815		fail:
816			fprintf(stderr,
817				"couldn't get symbol #%zd from %s: %s\n",
818				i, filename, elf_errmsg(-1));
819			continue;
820		}
821
822		if (GELF_ST_TYPE(sym.st_info) != STT_FUNC
823		    || sym.st_value == 0
824		    || sym.st_shndx == STN_UNDEF)
825			continue;
826
827		/* Find symbol name and snip version.  */
828		const char *orig_name = strtab + sym.st_name;
829		const char *version = strchr(orig_name, '@');
830		size_t len = version != NULL ? (assert(version > orig_name),
831						(size_t)(version - orig_name))
832			: strlen(orig_name);
833		char name[len + 1];
834		memcpy(name, orig_name, len);
835		name[len] = 0;
836
837		/* If we are interested in exports, store this name.  */
838		char *name_copy = NULL;
839		if (names != NULL) {
840			struct library_exported_name *export = NULL;
841			name_copy = strdup(name);
842
843			if (name_copy == NULL
844			    || (export = malloc(sizeof(*export))) == NULL) {
845				free(name_copy);
846				fprintf(stderr, "Couldn't store symbol %s.  "
847					"Tracing may be incomplete.\n", name);
848			} else {
849				export->name = name_copy;
850				export->own_name = 1;
851				export->next = *names;
852				*names = export;
853			}
854		}
855
856		/* If the symbol is not matched, skip it.  We already
857		 * stored it to export list above.  */
858		if (!filter_matches_symbol(options.static_filter, name, lib))
859			continue;
860
861		arch_addr_t addr = (arch_addr_t)
862			(uintptr_t)(sym.st_value + lte->bias);
863		arch_addr_t naddr;
864
865		/* On arches that support OPD, the value of typical
866		 * function symbol will be a pointer to .opd, but some
867		 * will point directly to .text.  We don't want to
868		 * translate those.  */
869		if (secflags[sym.st_shndx] & SHF_EXECINSTR) {
870			naddr = addr;
871		} else if (arch_translate_address(lte, addr, &naddr) < 0) {
872			fprintf(stderr,
873				"couldn't translate address of %s@%s: %s\n",
874				name, lib->soname, strerror(errno));
875			continue;
876		}
877
878		char *full_name;
879		int own_full_name = 1;
880		if (name_copy == NULL) {
881			full_name = strdup(name);
882			if (full_name == NULL)
883				goto fail;
884		} else {
885			full_name = name_copy;
886			own_full_name = 0;
887		}
888
889		/* Look whether we already have a symbol for this
890		 * address.  If not, add this one.  */
891		struct unique_symbol key = { naddr, NULL };
892		struct unique_symbol *unique
893			= lsearch(&key, symbols, &num_symbols,
894				  sizeof(*symbols), &unique_symbol_cmp);
895
896		if (unique->libsym == NULL) {
897			struct library_symbol *libsym = malloc(sizeof(*libsym));
898			if (libsym == NULL
899			    || library_symbol_init(libsym, naddr,
900						   full_name, own_full_name,
901						   LS_TOPLT_NONE) < 0) {
902				--num_symbols;
903				goto fail;
904			}
905			unique->libsym = libsym;
906			unique->addr = naddr;
907
908		} else if (strlen(full_name) < strlen(unique->libsym->name)) {
909			library_symbol_set_name(unique->libsym,
910						full_name, own_full_name);
911
912		} else if (own_full_name) {
913			free(full_name);
914		}
915	}
916
917	/* Now we do the union of this set of unique symbols with
918	 * what's already in the library.  */
919	for (i = 0; i < num_symbols; ++i) {
920		struct library_symbol *this_sym = symbols[i].libsym;
921		assert(this_sym != NULL);
922		struct library_symbol *other
923			= library_each_symbol(lib, NULL, symbol_with_address,
924					      &this_sym->enter_addr);
925		if (other != NULL) {
926			library_symbol_destroy(this_sym);
927			free(this_sym);
928			symbols[i].libsym = NULL;
929		}
930	}
931
932	for (i = 0; i < num_symbols; ++i)
933		if (symbols[i].libsym != NULL)
934			library_add_symbol(lib, symbols[i].libsym);
935
936	free(symbols);
937	return 0;
938}
939
940static int
941populate_symtab(struct process *proc, const char *filename,
942		struct ltelf *lte, struct library *lib,
943		int symtabs, int exports)
944{
945	int status;
946	if (symtabs && lte->symtab != NULL && lte->strtab != NULL
947	    && (status = populate_this_symtab(proc, filename, lte, lib,
948					      lte->symtab, lte->strtab,
949					      lte->symtab_count, NULL)) < 0)
950		return status;
951
952	/* Check whether we want to trace symbols implemented by this
953	 * library (-l).  */
954	struct library_exported_name **names = NULL;
955	if (exports) {
956		debug(DEBUG_FUNCTION, "-l matches %s", lib->soname);
957		names = &lib->exported_names;
958	}
959
960	return populate_this_symtab(proc, filename, lte, lib,
961				    lte->dynsym, lte->dynstr,
962				    lte->dynsym_count, names);
963}
964
965static int
966read_module(struct library *lib, struct process *proc,
967	    const char *filename, GElf_Addr bias, int main)
968{
969	struct ltelf lte = {};
970	if (open_elf(&lte, filename) < 0)
971		return -1;
972
973	/* XXX When we abstract ABI into a module, this should instead
974	 * become something like
975	 *
976	 *    proc->abi = arch_get_abi(lte.ehdr);
977	 *
978	 * The code in open_elf needs to be replaced by this logic.
979	 * Be warned that libltrace.c calls open_elf as well to
980	 * determine whether ABI is supported.  This is to get
981	 * reasonable error messages when trying to run 64-bit binary
982	 * with 32-bit ltrace.  It is desirable to preserve this.  */
983	proc->e_machine = lte.ehdr.e_machine;
984	proc->e_class = lte.ehdr.e_ident[EI_CLASS];
985	get_arch_dep(proc);
986
987	/* Find out the base address.  For PIE main binaries we look
988	 * into auxv, otherwise we scan phdrs.  */
989	if (main && lte.ehdr.e_type == ET_DYN) {
990		arch_addr_t entry;
991		if (process_get_entry(proc, &entry, NULL) < 0) {
992			fprintf(stderr, "Couldn't find entry of PIE %s\n",
993				filename);
994			return -1;
995		}
996		/* XXX The double cast should be removed when
997		 * arch_addr_t becomes integral type.  */
998		lte.entry_addr = (GElf_Addr)(uintptr_t)entry;
999		lte.bias = (GElf_Addr)(uintptr_t)entry - lte.ehdr.e_entry;
1000
1001	} else {
1002		GElf_Phdr phdr;
1003		size_t i;
1004		for (i = 0; gelf_getphdr (lte.elf, i, &phdr) != NULL; ++i) {
1005			if (phdr.p_type == PT_LOAD) {
1006				lte.base_addr = phdr.p_vaddr + bias;
1007				break;
1008			}
1009		}
1010
1011		lte.bias = bias;
1012		lte.entry_addr = lte.ehdr.e_entry + lte.bias;
1013
1014		if (lte.base_addr == 0) {
1015			fprintf(stderr,
1016				"Couldn't determine base address of %s\n",
1017				filename);
1018			return -1;
1019		}
1020	}
1021
1022	if (do_init_elf(&lte, filename) < 0)
1023		return -1;
1024
1025	if (arch_elf_init(&lte, lib) < 0) {
1026		fprintf(stderr, "Backend initialization failed.\n");
1027		return -1;
1028	}
1029
1030	int status = 0;
1031	if (lib == NULL)
1032		goto fail;
1033
1034	/* Note that we set soname and pathname as soon as they are
1035	 * allocated, so in case of further errors, this get released
1036	 * when LIB is released, which should happen in the caller
1037	 * when we return error.  */
1038
1039	if (lib->pathname == NULL) {
1040		char *pathname = strdup(filename);
1041		if (pathname == NULL)
1042			goto fail;
1043		library_set_pathname(lib, pathname, 1);
1044	}
1045
1046	if (lte.soname != NULL) {
1047		char *soname = strdup(lte.soname);
1048		if (soname == NULL)
1049			goto fail;
1050		library_set_soname(lib, soname, 1);
1051	} else {
1052		const char *soname = rindex(lib->pathname, '/');
1053		if (soname != NULL)
1054			soname += 1;
1055		else
1056			soname = lib->pathname;
1057		library_set_soname(lib, soname, 0);
1058	}
1059
1060	/* XXX The double cast should be removed when
1061	 * arch_addr_t becomes integral type.  */
1062	arch_addr_t entry = (arch_addr_t)(uintptr_t)lte.entry_addr;
1063	if (arch_translate_address(&lte, entry, &entry) < 0)
1064		goto fail;
1065
1066	/* XXX The double cast should be removed when
1067	 * arch_addr_t becomes integral type.  */
1068	lib->base = (arch_addr_t)(uintptr_t)lte.base_addr;
1069	lib->entry = entry;
1070	/* XXX The double cast should be removed when
1071	 * arch_addr_t becomes integral type.  */
1072	lib->dyn_addr = (arch_addr_t)(uintptr_t)lte.dyn_addr;
1073
1074	/* There are two reasons that we need to inspect symbol tables
1075	 * or populate PLT entries.  Either the user requested
1076	 * corresponding tracing features (respectively -x and -e), or
1077	 * they requested tracing exported symbols (-l).
1078	 *
1079	 * In the latter case we need to keep even those PLT slots
1080	 * that are not requested by -e (but we keep them latent).  We
1081	 * also need to inspect .dynsym to find what exports this
1082	 * library provide, to turn on existing latent PLT
1083	 * entries.  */
1084
1085	int plts = filter_matches_library(options.plt_filter, lib);
1086	if ((plts || options.export_filter != NULL)
1087	    && populate_plt(proc, filename, &lte, lib,
1088			    options.export_filter != NULL) < 0)
1089		goto fail;
1090
1091	int exports = filter_matches_library(options.export_filter, lib);
1092	int symtabs = filter_matches_library(options.static_filter, lib);
1093	if ((symtabs || exports)
1094	    && populate_symtab(proc, filename, &lte, lib,
1095			       symtabs, exports) < 0)
1096		goto fail;
1097
1098done:
1099	do_close_elf(&lte);
1100	return status;
1101
1102fail:
1103	status = -1;
1104	goto done;
1105}
1106
1107int
1108ltelf_read_library(struct library *lib, struct process *proc,
1109		   const char *filename, GElf_Addr bias)
1110{
1111	return read_module(lib, proc, filename, bias, 0);
1112}
1113
1114
1115struct library *
1116ltelf_read_main_binary(struct process *proc, const char *path)
1117{
1118	struct library *lib = malloc(sizeof(*lib));
1119	if (lib == NULL || library_init(lib, LT_LIBTYPE_MAIN) < 0) {
1120		free(lib);
1121		return NULL;
1122	}
1123	library_set_pathname(lib, path, 0);
1124
1125	/* There is a race between running the process and reading its
1126	 * binary for internal consumption.  So open the binary from
1127	 * the /proc filesystem.  XXX Note that there is similar race
1128	 * for libraries, but there we don't have a nice answer like
1129	 * that.  Presumably we could read the DSOs from the process
1130	 * memory image, but that's not currently done.  */
1131	char *fname = pid2name(proc->pid);
1132	if (fname == NULL
1133	    || read_module(lib, proc, fname, 0, 1) < 0) {
1134		library_destroy(lib);
1135		free(lib);
1136		lib = NULL;
1137	}
1138
1139	free(fname);
1140	return lib;
1141}
1142