dwfl_segment_report_module.c revision 387654d501eabd17d46e1a7d2a2f27388ed52943
1/* Sniff out modules from ELF headers visible in memory segments.
2   Copyright (C) 2008-2012 Red Hat, Inc.
3   This file is part of elfutils.
4
5   This file is free software; you can redistribute it and/or modify
6   it under the terms of either
7
8     * the GNU Lesser General Public License as published by the Free
9       Software Foundation; either version 3 of the License, or (at
10       your option) any later version
11
12   or
13
14     * the GNU General Public License as published by the Free
15       Software Foundation; either version 2 of the License, or (at
16       your option) any later version
17
18   or both in parallel, as here.
19
20   elfutils is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received copies of the GNU General Public License and
26   the GNU Lesser General Public License along with this program.  If
27   not, see <http://www.gnu.org/licenses/>.  */
28
29#include <config.h>
30#include "../libelf/libelfP.h"	/* For NOTE_ALIGN.  */
31#undef	_
32#include "libdwflP.h"
33
34#include <elf.h>
35#include <gelf.h>
36#include <inttypes.h>
37#include <sys/param.h>
38#include <alloca.h>
39#include <endian.h>
40
41
42/* A good size for the initial read from memory, if it's not too costly.
43   This more than covers the phdrs and note segment in the average 64-bit
44   binary.  */
45
46#define INITIAL_READ	1024
47
48#if __BYTE_ORDER == __LITTLE_ENDIAN
49# define MY_ELFDATA	ELFDATA2LSB
50#else
51# define MY_ELFDATA	ELFDATA2MSB
52#endif
53
54
55/* Return user segment index closest to ADDR but not above it.
56   If NEXT, return the closest to ADDR but not below it.  */
57static int
58addr_segndx (Dwfl *dwfl, size_t segment, GElf_Addr addr, bool next)
59{
60  int ndx = -1;
61  do
62    {
63      if (dwfl->lookup_segndx[segment] >= 0)
64	ndx = dwfl->lookup_segndx[segment];
65      if (++segment >= dwfl->lookup_elts - 1)
66	return next ? ndx + 1 : ndx;
67    }
68  while (dwfl->lookup_addr[segment] < addr);
69
70  if (next)
71    {
72      while (dwfl->lookup_segndx[segment] < 0)
73	if (++segment >= dwfl->lookup_elts - 1)
74	  return ndx + 1;
75      ndx = dwfl->lookup_segndx[segment];
76    }
77
78  return ndx;
79}
80
81int
82dwfl_segment_report_module (Dwfl *dwfl, int ndx, const char *name,
83			    Dwfl_Memory_Callback *memory_callback,
84			    void *memory_callback_arg,
85			    Dwfl_Module_Callback *read_eagerly,
86			    void *read_eagerly_arg)
87{
88  size_t segment = ndx;
89
90  if (segment >= dwfl->lookup_elts)
91    segment = dwfl->lookup_elts - 1;
92
93  while (segment > 0
94	 && (dwfl->lookup_segndx[segment] > ndx
95	     || dwfl->lookup_segndx[segment] == -1))
96    --segment;
97
98  while (dwfl->lookup_segndx[segment] < ndx)
99    if (++segment == dwfl->lookup_elts)
100      return 0;
101
102  GElf_Addr start = dwfl->lookup_addr[segment];
103
104  inline bool segment_read (int segndx,
105			    void **buffer, size_t *buffer_available,
106			    GElf_Addr addr, size_t minread)
107  {
108    return ! (*memory_callback) (dwfl, segndx, buffer, buffer_available,
109				 addr, minread, memory_callback_arg);
110  }
111
112  inline void release_buffer (void **buffer, size_t *buffer_available)
113  {
114    if (*buffer != NULL)
115      (void) segment_read (-1, buffer, buffer_available, 0, 0);
116  }
117
118  /* First read in the file header and check its sanity.  */
119
120  void *buffer = NULL;
121  size_t buffer_available = INITIAL_READ;
122
123  inline int finish (void)
124  {
125    release_buffer (&buffer, &buffer_available);
126    return ndx;
127  }
128
129  if (segment_read (ndx, &buffer, &buffer_available,
130		    start, sizeof (Elf64_Ehdr))
131      || memcmp (buffer, ELFMAG, SELFMAG) != 0)
132    return finish ();
133
134  inline bool read_portion (void **data, size_t *data_size,
135			    GElf_Addr vaddr, size_t filesz)
136  {
137    if (vaddr - start + filesz > buffer_available
138	/* If we're in string mode, then don't consider the buffer we have
139	   sufficient unless it contains the terminator of the string.  */
140	|| (filesz == 0 && memchr (vaddr - start + buffer, '\0',
141				   buffer_available - (vaddr - start)) == NULL))
142      {
143	*data = NULL;
144	*data_size = filesz;
145	return segment_read (addr_segndx (dwfl, segment, vaddr, false),
146			     data, data_size, vaddr, filesz);
147      }
148
149    /* We already have this whole note segment from our initial read.  */
150    *data = vaddr - start + buffer;
151    *data_size = 0;
152    return false;
153  }
154
155  inline void finish_portion (void **data, size_t *data_size)
156  {
157    if (*data_size != 0)
158      release_buffer (data, data_size);
159  }
160
161  /* Extract the information we need from the file header.  */
162  union
163  {
164    Elf32_Ehdr e32;
165    Elf64_Ehdr e64;
166  } ehdr;
167  GElf_Off phoff;
168  uint_fast16_t phnum;
169  uint_fast16_t phentsize;
170  GElf_Off shdrs_end;
171  Elf_Data xlatefrom =
172    {
173      .d_type = ELF_T_EHDR,
174      .d_buf = (void *) buffer,
175      .d_version = EV_CURRENT,
176    };
177  Elf_Data xlateto =
178    {
179      .d_type = ELF_T_EHDR,
180      .d_buf = &ehdr,
181      .d_size = sizeof ehdr,
182      .d_version = EV_CURRENT,
183    };
184  switch (((const unsigned char *) buffer)[EI_CLASS])
185    {
186    case ELFCLASS32:
187      xlatefrom.d_size = sizeof (Elf32_Ehdr);
188      if (elf32_xlatetom (&xlateto, &xlatefrom,
189			  ((const unsigned char *) buffer)[EI_DATA]) == NULL)
190	return finish ();
191      phoff = ehdr.e32.e_phoff;
192      phnum = ehdr.e32.e_phnum;
193      phentsize = ehdr.e32.e_phentsize;
194      if (phentsize != sizeof (Elf32_Phdr))
195	return finish ();
196      shdrs_end = ehdr.e32.e_shoff + ehdr.e32.e_shnum * ehdr.e32.e_shentsize;
197      break;
198
199    case ELFCLASS64:
200      xlatefrom.d_size = sizeof (Elf64_Ehdr);
201      if (elf64_xlatetom (&xlateto, &xlatefrom,
202			  ((const unsigned char *) buffer)[EI_DATA]) == NULL)
203	return finish ();
204      phoff = ehdr.e64.e_phoff;
205      phnum = ehdr.e64.e_phnum;
206      phentsize = ehdr.e64.e_phentsize;
207      if (phentsize != sizeof (Elf64_Phdr))
208	return finish ();
209      shdrs_end = ehdr.e64.e_shoff + ehdr.e64.e_shnum * ehdr.e64.e_shentsize;
210      break;
211
212    default:
213      return finish ();
214    }
215
216  /* The file header tells where to find the program headers.
217     These are what we need to find the boundaries of the module.
218     Without them, we don't have a module to report.  */
219
220  if (phnum == 0)
221    return finish ();
222
223  xlatefrom.d_type = xlateto.d_type = ELF_T_PHDR;
224  xlatefrom.d_size = phnum * phentsize;
225
226  void *ph_buffer = NULL;
227  size_t ph_buffer_size = 0;
228  if (read_portion (&ph_buffer, &ph_buffer_size,
229		    start + phoff, xlatefrom.d_size))
230    return finish ();
231
232  xlatefrom.d_buf = ph_buffer;
233
234  union
235  {
236    Elf32_Phdr p32[phnum];
237    Elf64_Phdr p64[phnum];
238  } phdrs;
239
240  xlateto.d_buf = &phdrs;
241  xlateto.d_size = sizeof phdrs;
242
243  /* Track the bounds of the file visible in memory.  */
244  GElf_Off file_trimmed_end = 0; /* Proper p_vaddr + p_filesz end.  */
245  GElf_Off file_end = 0;	 /* Rounded up to effective page size.  */
246  GElf_Off contiguous = 0;	 /* Visible as contiguous file from START.  */
247  GElf_Off total_filesz = 0;	 /* Total size of data to read.  */
248
249  /* Collect the bias between START and the containing PT_LOAD's p_vaddr.  */
250  GElf_Addr bias = 0;
251  bool found_bias = false;
252
253  /* Collect the unbiased bounds of the module here.  */
254  GElf_Addr module_start = -1l;
255  GElf_Addr module_end = 0;
256  GElf_Addr module_address_sync = 0;
257
258  /* If we see PT_DYNAMIC, record it here.  */
259  GElf_Addr dyn_vaddr = 0;
260  GElf_Xword dyn_filesz = 0;
261
262  /* Collect the build ID bits here.  */
263  void *build_id = NULL;
264  size_t build_id_len = 0;
265  GElf_Addr build_id_vaddr = 0;
266
267  /* Consider a PT_NOTE we've found in the image.  */
268  inline void consider_notes (GElf_Addr vaddr, GElf_Xword filesz)
269  {
270    /* If we have already seen a build ID, we don't care any more.  */
271    if (build_id != NULL || filesz == 0)
272      return;
273
274    void *data;
275    size_t data_size;
276    if (read_portion (&data, &data_size, vaddr, filesz))
277      return;
278
279    assert (sizeof (Elf32_Nhdr) == sizeof (Elf64_Nhdr));
280
281    void *notes;
282    if (ehdr.e32.e_ident[EI_DATA] == MY_ELFDATA)
283      notes = data;
284    else
285      {
286	notes = malloc (filesz);
287	if (unlikely (notes == NULL))
288	  return;
289	xlatefrom.d_type = xlateto.d_type = ELF_T_NHDR;
290	xlatefrom.d_buf = (void *) data;
291	xlatefrom.d_size = filesz;
292	xlateto.d_buf = notes;
293	xlateto.d_size = filesz;
294	if (elf32_xlatetom (&xlateto, &xlatefrom,
295			    ehdr.e32.e_ident[EI_DATA]) == NULL)
296	  goto done;
297      }
298
299    const GElf_Nhdr *nh = notes;
300    while ((const void *) nh < (const void *) notes + filesz)
301     {
302	const void *note_name = nh + 1;
303	const void *note_desc = note_name + NOTE_ALIGN (nh->n_namesz);
304	if (unlikely ((size_t) ((const void *) notes + filesz
305				- note_desc) < nh->n_descsz))
306	  break;
307
308	if (nh->n_type == NT_GNU_BUILD_ID
309	    && nh->n_descsz > 0
310	    && nh->n_namesz == sizeof "GNU"
311	    && !memcmp (note_name, "GNU", sizeof "GNU"))
312	  {
313	    build_id_vaddr = note_desc - (const void *) notes + vaddr;
314	    build_id_len = nh->n_descsz;
315	    build_id = malloc (nh->n_descsz);
316	    if (likely (build_id != NULL))
317	      memcpy (build_id, note_desc, build_id_len);
318	    break;
319	  }
320
321	nh = note_desc + NOTE_ALIGN (nh->n_descsz);
322      }
323
324  done:
325    if (notes != data)
326      free (notes);
327    finish_portion (&data, &data_size);
328  }
329
330  /* Consider each of the program headers we've read from the image.  */
331  inline void consider_phdr (GElf_Word type,
332			     GElf_Addr vaddr, GElf_Xword memsz,
333			     GElf_Off offset, GElf_Xword filesz,
334			     GElf_Xword align)
335  {
336    switch (type)
337      {
338      case PT_DYNAMIC:
339	dyn_vaddr = vaddr;
340	dyn_filesz = filesz;
341	break;
342
343      case PT_NOTE:
344	/* We calculate from the p_offset of the note segment,
345	   because we don't yet know the bias for its p_vaddr.  */
346	consider_notes (start + offset, filesz);
347	break;
348
349      case PT_LOAD:
350	align = dwfl->segment_align > 1 ? dwfl->segment_align : align ?: 1;
351
352	GElf_Addr vaddr_end = (vaddr + memsz + align - 1) & -align;
353	GElf_Addr filesz_vaddr = filesz < memsz ? vaddr + filesz : vaddr_end;
354	GElf_Off filesz_offset = filesz_vaddr - vaddr + offset;
355
356	if (file_trimmed_end < offset + filesz)
357	  {
358	    file_trimmed_end = offset + filesz;
359
360	    /* Trim the last segment so we don't bother with zeros
361	       in the last page that are off the end of the file.
362	       However, if the extra bit in that page includes the
363	       section headers, keep them.  */
364	    if (shdrs_end <= filesz_offset && shdrs_end > file_trimmed_end)
365	      {
366		filesz += shdrs_end - file_trimmed_end;
367		file_trimmed_end = shdrs_end;
368	      }
369	  }
370
371	total_filesz += filesz;
372
373	if (file_end < filesz_offset)
374	  {
375	    file_end = filesz_offset;
376	    if (filesz_vaddr - start == filesz_offset)
377	      contiguous = file_end;
378	  }
379
380	if (!found_bias && (offset & -align) == 0
381	    && likely (filesz_offset >= phoff + phnum * phentsize))
382	  {
383	    bias = start - vaddr;
384	    found_bias = true;
385	  }
386
387	if ((vaddr & -align) < module_start)
388	  {
389	    module_start = vaddr & -align;
390	    module_address_sync = vaddr + memsz;
391	  }
392
393	if (module_end < vaddr_end)
394	  module_end = vaddr_end;
395	break;
396      }
397  }
398  if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
399    {
400      if (elf32_xlatetom (&xlateto, &xlatefrom,
401			  ehdr.e32.e_ident[EI_DATA]) == NULL)
402	found_bias = false;	/* Trigger error check.  */
403      else
404	for (uint_fast16_t i = 0; i < phnum; ++i)
405	  consider_phdr (phdrs.p32[i].p_type,
406			 phdrs.p32[i].p_vaddr, phdrs.p32[i].p_memsz,
407			 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz,
408			 phdrs.p32[i].p_align);
409    }
410  else
411    {
412      if (elf64_xlatetom (&xlateto, &xlatefrom,
413			  ehdr.e32.e_ident[EI_DATA]) == NULL)
414	found_bias = false;	/* Trigger error check.  */
415      else
416	for (uint_fast16_t i = 0; i < phnum; ++i)
417	  consider_phdr (phdrs.p64[i].p_type,
418			 phdrs.p64[i].p_vaddr, phdrs.p64[i].p_memsz,
419			 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz,
420			 phdrs.p64[i].p_align);
421    }
422
423  finish_portion (&ph_buffer, &ph_buffer_size);
424
425  /* We must have seen the segment covering offset 0, or else the ELF
426     header we read at START was not produced by these program headers.  */
427  if (unlikely (!found_bias))
428    return finish ();
429
430  /* Now we know enough to report a module for sure: its bounds.  */
431  module_start += bias;
432  module_end += bias;
433
434  dyn_vaddr += bias;
435
436  /* Our return value now says to skip the segments contained
437     within the module.  */
438  ndx = addr_segndx (dwfl, segment, module_end, true);
439
440  /* Examine its .dynamic section to get more interesting details.
441     If it has DT_SONAME, we'll use that as the module name.
442     If it has a DT_DEBUG, then it's actually a PIE rather than a DSO.
443     We need its DT_STRTAB and DT_STRSZ to decipher DT_SONAME,
444     and they also tell us the essential portion of the file
445     for fetching symbols.  */
446  GElf_Addr soname_stroff = 0;
447  GElf_Addr dynstr_vaddr = 0;
448  GElf_Xword dynstrsz = 0;
449  bool execlike = false;
450  inline bool consider_dyn (GElf_Sxword tag, GElf_Xword val)
451  {
452    switch (tag)
453      {
454      default:
455	return false;
456
457      case DT_DEBUG:
458	execlike = true;
459	break;
460
461      case DT_SONAME:
462	soname_stroff = val;
463	break;
464
465      case DT_STRTAB:
466	dynstr_vaddr = val;
467	break;
468
469      case DT_STRSZ:
470	dynstrsz = val;
471	break;
472      }
473
474    return soname_stroff != 0 && dynstr_vaddr != 0 && dynstrsz != 0;
475  }
476
477  const size_t dyn_entsize = (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32
478			      ? sizeof (Elf32_Dyn) : sizeof (Elf64_Dyn));
479  void *dyn_data = NULL;
480  size_t dyn_data_size = 0;
481  if (dyn_filesz != 0 && dyn_filesz % dyn_entsize == 0
482      && ! read_portion (&dyn_data, &dyn_data_size, dyn_vaddr, dyn_filesz))
483    {
484      union
485      {
486	Elf32_Dyn d32[dyn_filesz / sizeof (Elf32_Dyn)];
487	Elf64_Dyn d64[dyn_filesz / sizeof (Elf64_Dyn)];
488      } dyn;
489
490      xlatefrom.d_type = xlateto.d_type = ELF_T_DYN;
491      xlatefrom.d_buf = (void *) dyn_data;
492      xlatefrom.d_size = dyn_filesz;
493      xlateto.d_buf = &dyn;
494      xlateto.d_size = sizeof dyn;
495
496      if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
497	{
498	  if (elf32_xlatetom (&xlateto, &xlatefrom,
499			      ehdr.e32.e_ident[EI_DATA]) != NULL)
500	    for (size_t i = 0; i < dyn_filesz / sizeof dyn.d32[0]; ++i)
501	      if (consider_dyn (dyn.d32[i].d_tag, dyn.d32[i].d_un.d_val))
502		break;
503	}
504      else
505	{
506	  if (elf64_xlatetom (&xlateto, &xlatefrom,
507			      ehdr.e32.e_ident[EI_DATA]) != NULL)
508	    for (size_t i = 0; i < dyn_filesz / sizeof dyn.d64[0]; ++i)
509	      if (consider_dyn (dyn.d64[i].d_tag, dyn.d64[i].d_un.d_val))
510		break;
511	}
512    }
513  finish_portion (&dyn_data, &dyn_data_size);
514
515  /* We'll use the name passed in or a stupid default if not DT_SONAME.  */
516  if (name == NULL)
517    name = ehdr.e32.e_type == ET_EXEC ? "[exe]" : execlike ? "[pie]" : "[dso]";
518
519  void *soname = NULL;
520  size_t soname_size = 0;
521  if (dynstrsz != 0 && dynstr_vaddr != 0)
522    {
523      /* We know the bounds of the .dynstr section.
524
525	 The DYNSTR_VADDR pointer comes from the .dynamic section
526	 (DT_STRTAB, detected above).  Ordinarily the dynamic linker
527	 will have adjusted this pointer in place so it's now an
528	 absolute address.  But sometimes .dynamic is read-only (in
529	 vDSOs and odd architectures), and sometimes the adjustment
530	 just hasn't happened yet in the memory image we looked at.
531	 So treat DYNSTR_VADDR as an absolute address if it falls
532	 within the module bounds, or try applying the phdr bias
533	 when that adjusts it to fall within the module bounds.  */
534
535      if ((dynstr_vaddr < module_start || dynstr_vaddr >= module_end)
536	  && dynstr_vaddr + bias >= module_start
537	  && dynstr_vaddr + bias < module_end)
538	dynstr_vaddr += bias;
539
540      if (unlikely (dynstr_vaddr + dynstrsz > module_end))
541	dynstrsz = 0;
542
543      /* Try to get the DT_SONAME string.  */
544      if (soname_stroff != 0 && soname_stroff + 1 < dynstrsz
545	  && ! read_portion (&soname, &soname_size,
546			     dynstr_vaddr + soname_stroff, 0))
547	name = soname;
548    }
549
550  /* Now that we have chosen the module's name and bounds, report it.
551     If we found a build ID, report that too.  */
552
553  Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, name,
554						 module_start, module_end);
555  if (likely (mod != NULL) && build_id != NULL
556      && unlikely (INTUSE(dwfl_module_report_build_id) (mod,
557							build_id,
558							build_id_len,
559							build_id_vaddr)))
560    {
561      mod->gc = true;
562      mod = NULL;
563    }
564
565  /* At this point we do not need BUILD_ID or NAME any more.
566     They have been copied.  */
567  free (build_id);
568  finish_portion (&soname, &soname_size);
569
570  if (unlikely (mod == NULL))
571    {
572      ndx = -1;
573      return finish ();
574    }
575
576  /* We have reported the module.  Now let the caller decide whether we
577     should read the whole thing in right now.  */
578
579  const GElf_Off cost = (contiguous < file_trimmed_end ? total_filesz
580			 : buffer_available >= contiguous ? 0
581			 : contiguous - buffer_available);
582  const GElf_Off worthwhile = ((dynstr_vaddr == 0 || dynstrsz == 0) ? 0
583			       : dynstr_vaddr + dynstrsz - start);
584  const GElf_Off whole = MAX (file_trimmed_end, shdrs_end);
585
586  Elf *elf = NULL;
587  if ((*read_eagerly) (MODCB_ARGS (mod), &buffer, &buffer_available,
588		       cost, worthwhile, whole, contiguous,
589		       read_eagerly_arg, &elf)
590      && elf == NULL)
591    {
592      /* The caller wants to read the whole file in right now, but hasn't
593	 done it for us.  Fill in a local image of the virtual file.  */
594
595      void *contents = calloc (1, file_trimmed_end);
596      if (unlikely (contents == NULL))
597	return finish ();
598
599      inline void final_read (size_t offset, GElf_Addr vaddr, size_t size)
600      {
601	void *into = contents + offset;
602	size_t read_size = size;
603	(void) segment_read (addr_segndx (dwfl, segment, vaddr, false),
604			     &into, &read_size, vaddr, size);
605      }
606
607      if (contiguous < file_trimmed_end)
608	{
609	  /* We can't use the memory image verbatim as the file image.
610	     So we'll be reading into a local image of the virtual file.  */
611
612	  inline void read_phdr (GElf_Word type, GElf_Addr vaddr,
613				 GElf_Off offset, GElf_Xword filesz)
614	  {
615	    if (type == PT_LOAD)
616	      final_read (offset, vaddr + bias, filesz);
617	  }
618
619	  if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
620	    for (uint_fast16_t i = 0; i < phnum; ++i)
621	      read_phdr (phdrs.p32[i].p_type, phdrs.p32[i].p_vaddr,
622			 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz);
623	  else
624	    for (uint_fast16_t i = 0; i < phnum; ++i)
625	      read_phdr (phdrs.p64[i].p_type, phdrs.p64[i].p_vaddr,
626			 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz);
627	}
628      else
629	{
630	  /* The whole file sits contiguous in memory,
631	     but the caller didn't want to just do it.  */
632
633	  const size_t have = MIN (buffer_available, file_trimmed_end);
634	  memcpy (contents, buffer, have);
635
636	  if (have < file_trimmed_end)
637	    final_read (have, start + have, file_trimmed_end - have);
638	}
639
640      elf = elf_memory (contents, file_trimmed_end);
641      if (unlikely (elf == NULL))
642	free (contents);
643      else
644	elf->flags |= ELF_F_MALLOCED;
645    }
646
647  if (elf != NULL)
648    {
649      /* Install the file in the module.  */
650      mod->main.elf = elf;
651      mod->main.vaddr = module_start - bias;
652      mod->main.address_sync = module_address_sync;
653      mod->main_bias = bias;
654    }
655
656  return finish ();
657}
658