1/* Sniff out modules from ELF headers visible in memory segments.
2   Copyright (C) 2008 Red Hat, Inc.
3   This file is part of Red Hat elfutils.
4
5   Red Hat elfutils is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by the
7   Free Software Foundation; version 2 of the License.
8
9   Red Hat elfutils is distributed in the hope that it will be useful, but
10   WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   General Public License for more details.
13
14   You should have received a copy of the GNU General Public License along
15   with Red Hat elfutils; if not, write to the Free Software Foundation,
16   Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA.
17
18   In addition, as a special exception, Red Hat, Inc. gives You the
19   additional right to link the code of Red Hat elfutils with code licensed
20   under any Open Source Initiative certified open source license
21   (http://www.opensource.org/licenses/index.php) which requires the
22   distribution of source code with any binary distribution and to
23   distribute linked combinations of the two.  Non-GPL Code permitted under
24   this exception must only link to the code of Red Hat elfutils through
25   those well defined interfaces identified in the file named EXCEPTION
26   found in the source code files (the "Approved Interfaces").  The files
27   of Non-GPL Code may instantiate templates or use macros or inline
28   functions from the Approved Interfaces without causing the resulting
29   work to be covered by the GNU General Public License.  Only Red Hat,
30   Inc. may make changes or additions to the list of Approved Interfaces.
31   Red Hat's grant of this exception is conditioned upon your not adding
32   any new exceptions.  If you wish to add a new Approved Interface or
33   exception, please contact Red Hat.  You must obey the GNU General Public
34   License in all respects for all of the Red Hat elfutils code and other
35   code used in conjunction with Red Hat elfutils except the Non-GPL Code
36   covered by this exception.  If you modify this file, you may extend this
37   exception to your version of the file, but you are not obligated to do
38   so.  If you do not wish to provide this exception without modification,
39   you must delete this exception statement from your version and license
40   this file solely under the GPL without exception.
41
42   Red Hat elfutils is an included package of the Open Invention Network.
43   An included package of the Open Invention Network is a package for which
44   Open Invention Network licensees cross-license their patents.  No patent
45   license is granted, either expressly or impliedly, by designation as an
46   included package.  Should you wish to participate in the Open Invention
47   Network licensing program, please visit www.openinventionnetwork.com
48   <http://www.openinventionnetwork.com>.  */
49
50#include <config.h>
51#include "../libelf/libelfP.h"	/* For NOTE_ALIGN.  */
52#undef	_
53#include "libdwflP.h"
54
55#include <elf.h>
56#include <gelf.h>
57#include <inttypes.h>
58#include <sys/param.h>
59#include <alloca.h>
60#include <endian.h>
61
62
63/* A good size for the initial read from memory, if it's not too costly.
64   This more than covers the phdrs and note segment in the average 64-bit
65   binary.  */
66
67#define INITIAL_READ	1024
68
69#if __BYTE_ORDER == __LITTLE_ENDIAN
70# define MY_ELFDATA	ELFDATA2LSB
71#else
72# define MY_ELFDATA	ELFDATA2MSB
73#endif
74
75
76/* Return user segment index closest to ADDR but not above it.  */
77static int
78addr_segndx (Dwfl *dwfl, size_t segment, GElf_Addr addr)
79{
80  int ndx = dwfl->lookup_segndx[segment];
81  do
82    {
83      if (dwfl->lookup_segndx[segment] >= 0)
84	ndx = dwfl->lookup_segndx[segment];
85      ++segment;
86    }
87  while (segment < dwfl->lookup_elts - 1
88	 && dwfl->lookup_addr[segment] < addr);
89
90  while (dwfl->lookup_segndx[segment] < 0
91	 && segment < dwfl->lookup_elts - 1)
92      ++segment;
93
94  if (dwfl->lookup_segndx[segment] >= 0)
95    ndx = dwfl->lookup_segndx[segment];
96
97  return ndx;
98}
99
100int
101dwfl_segment_report_module (Dwfl *dwfl, int ndx, const char *name,
102			    Dwfl_Memory_Callback *memory_callback,
103			    void *memory_callback_arg,
104			    Dwfl_Module_Callback *read_eagerly,
105			    void *read_eagerly_arg)
106{
107  size_t segment = ndx;
108
109  if (segment >= dwfl->lookup_elts)
110    segment = dwfl->lookup_elts - 1;
111
112  while (segment > 0 && dwfl->lookup_segndx[segment] > ndx)
113    --segment;
114
115  while (dwfl->lookup_segndx[segment] < ndx)
116    if (++segment == dwfl->lookup_elts)
117      return 0;
118
119  GElf_Addr start = dwfl->lookup_addr[segment];
120
121  inline bool segment_read (int segndx,
122			    void **buffer, size_t *buffer_available,
123			    GElf_Addr addr, size_t minread)
124  {
125    return ! (*memory_callback) (dwfl, segndx, buffer, buffer_available,
126				 addr, minread, memory_callback_arg);
127  }
128
129  inline void release_buffer (void **buffer, size_t *buffer_available)
130  {
131    if (*buffer != NULL)
132      (void) segment_read (-1, buffer, buffer_available, 0, 0);
133  }
134
135  /* First read in the file header and check its sanity.  */
136
137  void *buffer = NULL;
138  size_t buffer_available = INITIAL_READ;
139
140  inline int finish (void)
141  {
142    release_buffer (&buffer, &buffer_available);
143    return ndx;
144  }
145
146  if (segment_read (ndx, &buffer, &buffer_available,
147		    start, sizeof (Elf64_Ehdr))
148      || memcmp (buffer, ELFMAG, SELFMAG) != 0)
149    return finish ();
150
151  inline bool read_portion (void **data, size_t *data_size,
152			    GElf_Addr vaddr, size_t filesz)
153  {
154    if (vaddr - start + filesz > buffer_available)
155      {
156	*data = NULL;
157	*data_size = filesz;
158	return segment_read (addr_segndx (dwfl, segment, vaddr),
159			     data, data_size, vaddr, filesz);
160      }
161
162    /* We already have this whole note segment from our initial read.  */
163    *data = vaddr - start + buffer;
164    *data_size = 0;
165    return false;
166  }
167
168  inline void finish_portion (void **data, size_t *data_size)
169  {
170    if (*data_size != 0)
171      release_buffer (data, data_size);
172  }
173
174  /* Extract the information we need from the file header.  */
175  union
176  {
177    Elf32_Ehdr e32;
178    Elf64_Ehdr e64;
179  } ehdr;
180  GElf_Off phoff;
181  uint_fast16_t phnum;
182  uint_fast16_t phentsize;
183  GElf_Off shdrs_end;
184  Elf_Data xlatefrom =
185    {
186      .d_type = ELF_T_EHDR,
187      .d_buf = (void *) buffer,
188      .d_version = EV_CURRENT,
189    };
190  Elf_Data xlateto =
191    {
192      .d_type = ELF_T_EHDR,
193      .d_buf = &ehdr,
194      .d_size = sizeof ehdr,
195      .d_version = EV_CURRENT,
196    };
197  switch (((const unsigned char *) buffer)[EI_CLASS])
198    {
199    case ELFCLASS32:
200      xlatefrom.d_size = sizeof (Elf32_Ehdr);
201      if (elf32_xlatetom (&xlateto, &xlatefrom,
202			  ((const unsigned char *) buffer)[EI_DATA]) == NULL)
203	return finish ();
204      phoff = ehdr.e32.e_phoff;
205      phnum = ehdr.e32.e_phnum;
206      phentsize = ehdr.e32.e_phentsize;
207      if (phentsize != sizeof (Elf32_Phdr))
208	return finish ();
209      shdrs_end = ehdr.e32.e_shoff + ehdr.e32.e_shnum * ehdr.e32.e_shentsize;
210      break;
211
212    case ELFCLASS64:
213      xlatefrom.d_size = sizeof (Elf64_Ehdr);
214      if (elf64_xlatetom (&xlateto, &xlatefrom,
215			  ((const unsigned char *) buffer)[EI_DATA]) == NULL)
216	return finish ();
217      phoff = ehdr.e64.e_phoff;
218      phnum = ehdr.e64.e_phnum;
219      phentsize = ehdr.e64.e_phentsize;
220      if (phentsize != sizeof (Elf64_Phdr))
221	return finish ();
222      shdrs_end = ehdr.e64.e_shoff + ehdr.e64.e_shnum * ehdr.e64.e_shentsize;
223      break;
224
225    default:
226      return finish ();
227    }
228
229  /* The file header tells where to find the program headers.
230     These are what we need to find the boundaries of the module.
231     Without them, we don't have a module to report.  */
232
233  if (phnum == 0)
234    return finish ();
235
236  xlatefrom.d_type = xlateto.d_type = ELF_T_PHDR;
237  xlatefrom.d_size = phnum * phentsize;
238
239  void *ph_buffer = NULL;
240  size_t ph_buffer_size = 0;
241  if (read_portion (&ph_buffer, &ph_buffer_size,
242		    start + phoff, xlatefrom.d_size))
243    return finish ();
244
245  xlatefrom.d_buf = ph_buffer;
246
247  union
248  {
249    Elf32_Phdr p32[phnum];
250    Elf64_Phdr p64[phnum];
251  } phdrs;
252
253  xlateto.d_buf = &phdrs;
254  xlateto.d_size = sizeof phdrs;
255
256  /* Track the bounds of the file visible in memory.  */
257  GElf_Off file_trimmed_end = 0; /* Proper p_vaddr + p_filesz end.  */
258  GElf_Off file_end = 0;	 /* Rounded up to effective page size.  */
259  GElf_Off contiguous = 0;	 /* Visible as contiguous file from START.  */
260  GElf_Off total_filesz = 0;	 /* Total size of data to read.  */
261
262  /* Collect the bias between START and the containing PT_LOAD's p_vaddr.  */
263  GElf_Addr bias = 0;
264  bool found_bias = false;
265
266  /* Collect the unbiased bounds of the module here.  */
267  GElf_Addr module_start = -1l;
268  GElf_Addr module_end = 0;
269
270  /* If we see PT_DYNAMIC, record it here.  */
271  GElf_Addr dyn_vaddr = 0;
272  GElf_Xword dyn_filesz = 0;
273
274  /* Collect the build ID bits here.  */
275  void *build_id = NULL;
276  size_t build_id_len = 0;
277  GElf_Addr build_id_vaddr = 0;
278
279  /* Consider a PT_NOTE we've found in the image.  */
280  inline void consider_notes (GElf_Addr vaddr, GElf_Xword filesz)
281  {
282    /* If we have already seen a build ID, we don't care any more.  */
283    if (build_id != NULL || filesz == 0)
284      return;
285
286    void *data;
287    size_t data_size;
288    if (read_portion (&data, &data_size, vaddr, filesz))
289      return;
290
291    assert (sizeof (Elf32_Nhdr) == sizeof (Elf64_Nhdr));
292
293    void *notes;
294    if (ehdr.e32.e_ident[EI_DATA] == MY_ELFDATA)
295      notes = data;
296    else
297      {
298	notes = malloc (filesz);
299	if (unlikely (notes == NULL))
300	  return;
301	xlatefrom.d_type = xlateto.d_type = ELF_T_NHDR;
302	xlatefrom.d_buf = (void *) data;
303	xlatefrom.d_size = filesz;
304	xlateto.d_buf = notes;
305	xlateto.d_size = filesz;
306	if (elf32_xlatetom (&xlateto, &xlatefrom,
307			    ehdr.e32.e_ident[EI_DATA]) == NULL)
308	  goto done;
309      }
310
311    const GElf_Nhdr *nh = notes;
312    while ((const void *) nh < (const void *) notes + filesz)
313     {
314	const void *note_name = nh + 1;
315	const void *note_desc = note_name + NOTE_ALIGN (nh->n_namesz);
316	if (unlikely ((size_t) ((const void *) notes + filesz
317				- note_desc) < nh->n_descsz))
318	  break;
319
320	if (nh->n_type == NT_GNU_BUILD_ID
321	    && nh->n_descsz > 0
322	    && nh->n_namesz == sizeof "GNU"
323	    && !memcmp (note_name, "GNU", sizeof "GNU"))
324	  {
325	    build_id_vaddr = note_desc - (const void *) notes + vaddr;
326	    build_id_len = nh->n_descsz;
327	    build_id = malloc (nh->n_descsz);
328	    if (likely (build_id != NULL))
329	      memcpy (build_id, note_desc, build_id_len);
330	    break;
331	  }
332
333	nh = note_desc + NOTE_ALIGN (nh->n_descsz);
334      }
335
336  done:
337    if (notes != data)
338      free (notes);
339    finish_portion (&data, &data_size);
340  }
341
342  /* Consider each of the program headers we've read from the image.  */
343  inline void consider_phdr (GElf_Word type,
344			     GElf_Addr vaddr, GElf_Xword memsz,
345			     GElf_Off offset, GElf_Xword filesz,
346			     GElf_Xword align)
347  {
348    switch (type)
349      {
350      case PT_DYNAMIC:
351	dyn_vaddr = vaddr;
352	dyn_filesz = filesz;
353	break;
354
355      case PT_NOTE:
356	/* We calculate from the p_offset of the note segment,
357	   because we don't yet know the bias for its p_vaddr.  */
358	consider_notes (start + offset, filesz);
359	break;
360
361      case PT_LOAD:
362	align = dwfl->segment_align > 1 ? dwfl->segment_align : align ?: 1;
363
364	GElf_Addr vaddr_end = (vaddr + memsz + align - 1) & -align;
365	GElf_Addr filesz_vaddr = filesz < memsz ? vaddr + filesz : vaddr_end;
366	GElf_Off filesz_offset = filesz_vaddr - vaddr + offset;
367
368	if (file_trimmed_end < offset + filesz)
369	  {
370	    file_trimmed_end = offset + filesz;
371
372	    /* Trim the last segment so we don't bother with zeros
373	       in the last page that are off the end of the file.
374	       However, if the extra bit in that page includes the
375	       section headers, keep them.  */
376	    if (shdrs_end <= filesz_offset && shdrs_end > file_trimmed_end)
377	      {
378		filesz += shdrs_end - file_trimmed_end;
379		file_trimmed_end = shdrs_end;
380	      }
381	  }
382
383	total_filesz += filesz;
384
385	if (file_end < filesz_offset)
386	  {
387	    file_end = filesz_offset;
388	    if (filesz_vaddr - start == filesz_offset)
389	      contiguous = file_end;
390	  }
391
392	if (!found_bias && (offset & -align) == 0
393	    && likely (filesz_offset >= phoff + phnum * phentsize))
394	  {
395	    bias = start - vaddr;
396	    found_bias = true;
397	  }
398
399	vaddr &= -align;
400	if (vaddr < module_start)
401	  module_start = vaddr;
402
403	if (module_end < vaddr_end)
404	  module_end = vaddr_end;
405	break;
406      }
407  }
408  if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
409    {
410      if (elf32_xlatetom (&xlateto, &xlatefrom,
411			  ehdr.e32.e_ident[EI_DATA]) == NULL)
412	found_bias = false;	/* Trigger error check.  */
413      else
414	for (uint_fast16_t i = 0; i < phnum; ++i)
415	  consider_phdr (phdrs.p32[i].p_type,
416			 phdrs.p32[i].p_vaddr, phdrs.p32[i].p_memsz,
417			 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz,
418			 phdrs.p32[i].p_align);
419    }
420  else
421    {
422      if (elf64_xlatetom (&xlateto, &xlatefrom,
423			  ehdr.e32.e_ident[EI_DATA]) == NULL)
424	found_bias = false;	/* Trigger error check.  */
425      else
426	for (uint_fast16_t i = 0; i < phnum; ++i)
427	  consider_phdr (phdrs.p64[i].p_type,
428			 phdrs.p64[i].p_vaddr, phdrs.p64[i].p_memsz,
429			 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz,
430			 phdrs.p64[i].p_align);
431    }
432
433  finish_portion (&ph_buffer, &ph_buffer_size);
434
435  /* We must have seen the segment covering offset 0, or else the ELF
436     header we read at START was not produced by these program headers.  */
437  if (unlikely (!found_bias))
438    return finish ();
439
440  /* Now we know enough to report a module for sure: its bounds.  */
441  module_start += bias;
442  module_end += bias;
443
444  dyn_vaddr += bias;
445
446  /* Our return value now says to skip the segments contained
447     within the module.
448     XXX handle gaps
449  */
450  ndx = addr_segndx (dwfl, segment, module_end);
451
452  /* Examine its .dynamic section to get more interesting details.
453     If it has DT_SONAME, we'll use that as the module name.
454     We need its DT_STRTAB and DT_STRSZ to decipher DT_SONAME,
455     and they also tell us the essential portion of the file
456     for fetching symbols.  */
457  GElf_Addr soname_stroff = 0;
458  GElf_Addr dynstr_vaddr = 0;
459  GElf_Xword dynstrsz = 0;
460  inline bool consider_dyn (GElf_Sxword tag, GElf_Xword val)
461  {
462    switch (tag)
463      {
464      default:
465	return false;
466
467      case DT_SONAME:
468	soname_stroff = val;
469	break;
470
471      case DT_STRTAB:
472	dynstr_vaddr = val;
473	break;
474
475      case DT_STRSZ:
476	dynstrsz = val;
477	break;
478      }
479
480    return soname_stroff != 0 && dynstr_vaddr != 0 && dynstrsz != 0;
481  }
482
483  const size_t dyn_entsize = (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32
484			      ? sizeof (Elf32_Dyn) : sizeof (Elf64_Dyn));
485  void *dyn_data = NULL;
486  size_t dyn_data_size = 0;
487  if (dyn_filesz != 0 && dyn_filesz % dyn_entsize == 0
488      && ! read_portion (&dyn_data, &dyn_data_size, dyn_vaddr, dyn_filesz))
489    {
490      union
491      {
492	Elf32_Dyn d32[dyn_filesz / sizeof (Elf32_Dyn)];
493	Elf64_Dyn d64[dyn_filesz / sizeof (Elf64_Dyn)];
494      } dyn;
495
496      xlatefrom.d_type = xlateto.d_type = ELF_T_DYN;
497      xlatefrom.d_buf = (void *) dyn_data;
498      xlatefrom.d_size = dyn_filesz;
499      xlateto.d_buf = &dyn;
500      xlateto.d_size = sizeof dyn;
501
502      if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
503	{
504	  if (elf32_xlatetom (&xlateto, &xlatefrom,
505			      ehdr.e32.e_ident[EI_DATA]) != NULL)
506	    for (size_t i = 0; i < dyn_filesz / sizeof dyn.d32[0]; ++i)
507	      if (consider_dyn (dyn.d32[i].d_tag, dyn.d32[i].d_un.d_val))
508		break;
509	}
510      else
511	{
512	  if (elf64_xlatetom (&xlateto, &xlatefrom,
513			      ehdr.e32.e_ident[EI_DATA]) != NULL)
514	    for (size_t i = 0; i < dyn_filesz / sizeof dyn.d64[0]; ++i)
515	      if (consider_dyn (dyn.d64[i].d_tag, dyn.d64[i].d_un.d_val))
516		break;
517	}
518    }
519  finish_portion (&dyn_data, &dyn_data_size);
520
521  /* We'll use the name passed in or a stupid default if not DT_SONAME.  */
522  if (name == NULL)
523    name = ehdr.e32.e_type == ET_EXEC ? "[exe]" : "[dso]";
524
525  void *soname = NULL;
526  size_t soname_size = 0;
527  if (dynstrsz != 0 && dynstr_vaddr != 0)
528    {
529      /* We know the bounds of the .dynstr section.  */
530      dynstr_vaddr += bias;
531      if (unlikely (dynstr_vaddr + dynstrsz > module_end))
532	dynstrsz = 0;
533
534      /* Try to get the DT_SONAME string.  */
535      if (soname_stroff != 0 && soname_stroff < dynstrsz - 1
536	  && ! read_portion (&soname, &soname_size,
537			     dynstr_vaddr + soname_stroff, 0))
538	name = soname;
539    }
540
541  /* Now that we have chosen the module's name and bounds, report it.
542     If we found a build ID, report that too.  */
543
544  Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, name,
545						 module_start, module_end);
546  if (likely (mod != NULL) && build_id != NULL
547      && unlikely (INTUSE(dwfl_module_report_build_id) (mod,
548							build_id,
549							build_id_len,
550							build_id_vaddr)))
551    {
552      mod->gc = true;
553      mod = NULL;
554    }
555
556  /* At this point we do not need BUILD_ID or NAME any more.
557     They have been copied.  */
558  free (build_id);
559  finish_portion (&soname, &soname_size);
560
561  if (unlikely (mod == NULL))
562    {
563      ndx = -1;
564      return finish ();
565    }
566
567  /* We have reported the module.  Now let the caller decide whether we
568     should read the whole thing in right now.  */
569
570  const GElf_Off cost = (contiguous < file_trimmed_end ? total_filesz
571			 : buffer_available >= contiguous ? 0
572			 : contiguous - buffer_available);
573  const GElf_Off worthwhile = ((dynstr_vaddr == 0 || dynstrsz == 0) ? 0
574			       : dynstr_vaddr + dynstrsz - start);
575  const GElf_Off whole = MAX (file_trimmed_end, shdrs_end);
576
577  Elf *elf = NULL;
578  if ((*read_eagerly) (MODCB_ARGS (mod), &buffer, &buffer_available,
579		       cost, worthwhile, whole, contiguous,
580		       read_eagerly_arg, &elf)
581      && elf == NULL)
582    {
583      /* The caller wants to read the whole file in right now, but hasn't
584	 done it for us.  Fill in a local image of the virtual file.  */
585
586      void *contents = calloc (1, file_trimmed_end);
587      if (unlikely (contents == NULL))
588	return finish ();
589
590      inline void final_read (size_t offset, GElf_Addr vaddr, size_t size)
591      {
592	void *into = contents + offset;
593	size_t read_size = size;
594	(void) segment_read (addr_segndx (dwfl, segment, vaddr),
595			     &into, &read_size, vaddr, size);
596      }
597
598      if (contiguous < file_trimmed_end)
599	{
600	  /* We can't use the memory image verbatim as the file image.
601	     So we'll be reading into a local image of the virtual file.  */
602
603	  inline void read_phdr (GElf_Word type, GElf_Addr vaddr,
604				 GElf_Off offset, GElf_Xword filesz)
605	  {
606	    if (type == PT_LOAD)
607	      final_read (offset, vaddr + bias, filesz);
608	  }
609
610	  if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
611	    for (uint_fast16_t i = 0; i < phnum; ++i)
612	      read_phdr (phdrs.p32[i].p_type, phdrs.p32[i].p_vaddr,
613			 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz);
614	  else
615	    for (uint_fast16_t i = 0; i < phnum; ++i)
616	      read_phdr (phdrs.p64[i].p_type, phdrs.p64[i].p_vaddr,
617			 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz);
618	}
619      else
620	{
621	  /* The whole file sits contiguous in memory,
622	     but the caller didn't want to just do it.  */
623
624	  const size_t have = MIN (buffer_available, file_trimmed_end);
625	  memcpy (contents, buffer, have);
626
627	  if (have < file_trimmed_end)
628	    final_read (have, start + have, file_trimmed_end - have);
629	}
630
631      elf = elf_memory (contents, file_trimmed_end);
632      if (unlikely (elf == NULL))
633	free (contents);
634      else
635	elf->flags |= ELF_F_MALLOCED;
636    }
637
638  if (elf != NULL)
639    {
640      /* Install the file in the module.  */
641      mod->main.elf = elf;
642      mod->main.bias = bias;
643    }
644
645  return finish ();
646}
647