1116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch/* Find debugging and symbol information for a module in libdwfl.
2116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   Copyright (C) 2005-2011 Red Hat, Inc.
3116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   This file is part of Red Hat elfutils.
4116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
5116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   Red Hat elfutils is free software; you can redistribute it and/or modify
6116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   it under the terms of the GNU General Public License as published by the
7116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   Free Software Foundation; version 2 of the License.
8116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
9116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   Red Hat elfutils is distributed in the hope that it will be useful, but
10116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   WITHOUT ANY WARRANTY; without even the implied warranty of
11116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   General Public License for more details.
13116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
14116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   You should have received a copy of the GNU General Public License along
151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   with Red Hat elfutils; if not, write to the Free Software Foundation,
16116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA.
17116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
18116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch   In addition, as a special exception, Red Hat, Inc. gives You the
19   additional right to link the code of Red Hat elfutils with code licensed
20   under any Open Source Initiative certified open source license
21   (http://www.opensource.org/licenses/index.php) which requires the
22   distribution of source code with any binary distribution and to
23   distribute linked combinations of the two.  Non-GPL Code permitted under
24   this exception must only link to the code of Red Hat elfutils through
25   those well defined interfaces identified in the file named EXCEPTION
26   found in the source code files (the "Approved Interfaces").  The files
27   of Non-GPL Code may instantiate templates or use macros or inline
28   functions from the Approved Interfaces without causing the resulting
29   work to be covered by the GNU General Public License.  Only Red Hat,
30   Inc. may make changes or additions to the list of Approved Interfaces.
31   Red Hat's grant of this exception is conditioned upon your not adding
32   any new exceptions.  If you wish to add a new Approved Interface or
33   exception, please contact Red Hat.  You must obey the GNU General Public
34   License in all respects for all of the Red Hat elfutils code and other
35   code used in conjunction with Red Hat elfutils except the Non-GPL Code
36   covered by this exception.  If you modify this file, you may extend this
37   exception to your version of the file, but you are not obligated to do
38   so.  If you do not wish to provide this exception without modification,
39   you must delete this exception statement from your version and license
40   this file solely under the GPL without exception.
41
42   Red Hat elfutils is an included package of the Open Invention Network.
43   An included package of the Open Invention Network is a package for which
44   Open Invention Network licensees cross-license their patents.  No patent
45   license is granted, either expressly or impliedly, by designation as an
46   included package.  Should you wish to participate in the Open Invention
47   Network licensing program, please visit www.openinventionnetwork.com
48   <http://www.openinventionnetwork.com>.  */
49
50#include "libdwflP.h"
51#include <fcntl.h>
52#include <string.h>
53#include <unistd.h>
54#include "../libdw/libdwP.h"	/* DWARF_E_* values are here.  */
55
56
57/* Open libelf FILE->fd and compute the load base of ELF as loaded in MOD.
58   When we return success, FILE->elf and FILE->vaddr are set up.  */
59static inline Dwfl_Error
60open_elf (Dwfl_Module *mod, struct dwfl_file *file)
61{
62  if (file->elf == NULL)
63    {
64      /* CBFAIL uses errno if it's set, so clear it first in case we don't
65	 set it with an open failure below.  */
66      errno = 0;
67
68      /* If there was a pre-primed file name left that the callback left
69	 behind, try to open that file name.  */
70      if (file->fd < 0 && file->name != NULL)
71	file->fd = TEMP_FAILURE_RETRY (open64 (file->name, O_RDONLY));
72
73      if (file->fd < 0)
74	return CBFAIL;
75
76      Dwfl_Error error = __libdw_open_file (&file->fd, &file->elf, true, false);
77      if (error != DWFL_E_NOERROR)
78	return error;
79    }
80  else if (unlikely (elf_kind (file->elf) != ELF_K_ELF))
81    {
82      elf_end (file->elf);
83      file->elf = NULL;
84      close (file->fd);
85      file->fd = -1;
86      return DWFL_E_BADELF;
87    }
88
89  GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr (file->elf, &ehdr_mem);
90  if (ehdr == NULL)
91    {
92    elf_error:
93      elf_end (file->elf);
94      file->elf = NULL;
95      close (file->fd);
96      file->fd = -1;
97      return DWFL_E (LIBELF, elf_errno ());
98    }
99
100  if (mod->e_type != ET_REL)
101    {
102      /* In any non-ET_REL file, we compute the "synchronization address".
103
104	 We start with the address at the end of the first PT_LOAD
105	 segment.  When prelink converts REL to RELA in an ET_DYN
106	 file, it expands the space between the beginning of the
107	 segment and the actual code/data addresses.  Since that
108	 change wasn't made in the debug file, the distance from
109	 p_vaddr to an address of interest (in an st_value or DWARF
110	 data) now differs between the main and debug files.  The
111	 distance from address_sync to an address of interest remains
112	 consistent.
113
114	 If there are no section headers at all (full stripping), then
115	 the end of the first segment is a valid synchronization address.
116	 This cannot happen in a prelinked file, since prelink itself
117	 relies on section headers for prelinking and for undoing it.
118	 (If you do full stripping on a prelinked file, then you get what
119	 you deserve--you can neither undo the prelinking, nor expect to
120	 line it up with a debug file separated before prelinking.)
121
122	 However, when prelink processes an ET_EXEC file, it can do
123	 something different.  There it juggles the "special" sections
124	 (SHT_DYNSYM et al) to make space for the additional prelink
125	 special sections.  Sometimes it will do this by moving a special
126	 section like .dynstr after the real program sections in the first
127	 PT_LOAD segment--i.e. to the end.  That changes the end address of
128	 the segment, so it no longer lines up correctly and is not a valid
129	 synchronization address to use.  Because of this, we need to apply
130	 a different prelink-savvy means to discover the synchronization
131	 address when there is a separate debug file and a prelinked main
132	 file.  That is done in find_debuginfo, below.  */
133
134      size_t phnum;
135      if (unlikely (elf_getphdrnum (file->elf, &phnum) != 0))
136	goto elf_error;
137
138      file->vaddr = file->address_sync = 0;
139      for (size_t i = 0; i < phnum; ++i)
140	{
141	  GElf_Phdr ph_mem;
142	  GElf_Phdr *ph = gelf_getphdr (file->elf, i, &ph_mem);
143	  if (unlikely (ph == NULL))
144	    goto elf_error;
145	  if (ph->p_type == PT_LOAD)
146	    {
147	      file->vaddr = ph->p_vaddr & -ph->p_align;
148	      file->address_sync = ph->p_vaddr + ph->p_memsz;
149	      break;
150	    }
151	}
152    }
153
154  mod->e_type = ehdr->e_type;
155
156  /* Relocatable Linux kernels are ET_EXEC but act like ET_DYN.  */
157  if (mod->e_type == ET_EXEC && file->vaddr != mod->low_addr)
158    mod->e_type = ET_DYN;
159
160  return DWFL_E_NOERROR;
161}
162
163/* Find the main ELF file for this module and open libelf on it.
164   When we return success, MOD->main.elf and MOD->main.bias are set up.  */
165void
166internal_function
167__libdwfl_getelf (Dwfl_Module *mod)
168{
169  if (mod->main.elf != NULL	/* Already done.  */
170      || mod->elferr != DWFL_E_NOERROR)	/* Cached failure.  */
171    return;
172
173  mod->main.fd = (*mod->dwfl->callbacks->find_elf) (MODCB_ARGS (mod),
174						    &mod->main.name,
175						    &mod->main.elf);
176  const bool fallback = mod->main.elf == NULL && mod->main.fd < 0;
177  mod->elferr = open_elf (mod, &mod->main);
178  if (mod->elferr != DWFL_E_NOERROR)
179    return;
180
181  if (!mod->main.valid)
182    {
183      /* Clear any explicitly reported build ID, just in case it was wrong.
184	 We'll fetch it from the file when asked.  */
185      free (mod->build_id_bits);
186      mod->build_id_bits = NULL;
187      mod->build_id_len = 0;
188    }
189  else if (fallback)
190    {
191      /* We have an authoritative build ID for this module, so
192	 don't use a file by name that doesn't match that ID.  */
193
194      assert (mod->build_id_len > 0);
195
196      switch (__builtin_expect (__libdwfl_find_build_id (mod, false,
197							 mod->main.elf), 2))
198	{
199	case 2:
200	  /* Build ID matches as it should. */
201	  return;
202
203	case -1:			/* ELF error.  */
204	  mod->elferr = INTUSE(dwfl_errno) ();
205	  break;
206
207	case 0:			/* File has no build ID note.  */
208	case 1:			/* FIle has a build ID that does not match.  */
209	  mod->elferr = DWFL_E_WRONG_ID_ELF;
210	  break;
211
212	default:
213	  abort ();
214	}
215
216      /* We get here when it was the right ELF file.  Clear it out.  */
217      elf_end (mod->main.elf);
218      mod->main.elf = NULL;
219      if (mod->main.fd >= 0)
220	{
221	  close (mod->main.fd);
222	  mod->main.fd = -1;
223	}
224    }
225
226  mod->main_bias = mod->e_type == ET_REL ? 0 : mod->low_addr - mod->main.vaddr;
227}
228
229/* Search an ELF file for a ".gnu_debuglink" section.  */
230static const char *
231find_debuglink (Elf *elf, GElf_Word *crc)
232{
233  size_t shstrndx;
234  if (elf_getshdrstrndx (elf, &shstrndx) < 0)
235    return NULL;
236
237  Elf_Scn *scn = NULL;
238  while ((scn = elf_nextscn (elf, scn)) != NULL)
239    {
240      GElf_Shdr shdr_mem;
241      GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
242      if (shdr == NULL)
243	return NULL;
244
245      const char *name = elf_strptr (elf, shstrndx, shdr->sh_name);
246      if (name == NULL)
247	return NULL;
248
249      if (!strcmp (name, ".gnu_debuglink"))
250	break;
251    }
252
253  if (scn == NULL)
254    return NULL;
255
256  /* Found the .gnu_debuglink section.  Extract its contents.  */
257  Elf_Data *rawdata = elf_rawdata (scn, NULL);
258  if (rawdata == NULL)
259    return NULL;
260
261  Elf_Data crcdata =
262    {
263      .d_type = ELF_T_WORD,
264      .d_buf = crc,
265      .d_size = sizeof *crc,
266      .d_version = EV_CURRENT,
267    };
268  Elf_Data conv =
269    {
270      .d_type = ELF_T_WORD,
271      .d_buf = rawdata->d_buf + rawdata->d_size - sizeof *crc,
272      .d_size = sizeof *crc,
273      .d_version = EV_CURRENT,
274    };
275
276  GElf_Ehdr ehdr_mem;
277  GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_mem);
278  if (ehdr == NULL)
279    return NULL;
280
281  Elf_Data *d = gelf_xlatetom (elf, &crcdata, &conv, ehdr->e_ident[EI_DATA]);
282  if (d == NULL)
283    return NULL;
284  assert (d == &crcdata);
285
286  return rawdata->d_buf;
287}
288
289/* If the main file might have been prelinked, then we need to
290   discover the correct synchronization address between the main and
291   debug files.  Because of prelink's section juggling, we cannot rely
292   on the address_sync computed from PT_LOAD segments (see open_elf).
293
294   We will attempt to discover a synchronization address based on the
295   section headers instead.  But finding a section address that is
296   safe to use requires identifying which sections are SHT_PROGBITS.
297   We can do that in the main file, but in the debug file all the
298   allocated sections have been transformed into SHT_NOBITS so we have
299   lost the means to match them up correctly.
300
301   The only method left to us is to decode the .gnu.prelink_undo
302   section in the prelinked main file.  This shows what the sections
303   looked like before prelink juggled them--when they still had a
304   direct correspondence to the debug file.  */
305static Dwfl_Error
306find_prelink_address_sync (Dwfl_Module *mod)
307{
308  /* The magic section is only identified by name.  */
309  size_t shstrndx;
310  if (elf_getshdrstrndx (mod->main.elf, &shstrndx) < 0)
311    return DWFL_E_LIBELF;
312
313  Elf_Scn *scn = NULL;
314  while ((scn = elf_nextscn (mod->main.elf, scn)) != NULL)
315    {
316      GElf_Shdr shdr_mem;
317      GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
318      if (unlikely (shdr == NULL))
319	return DWFL_E_LIBELF;
320      if (shdr->sh_type == SHT_PROGBITS
321	  && !(shdr->sh_flags & SHF_ALLOC)
322	  && shdr->sh_name != 0)
323	{
324	  const char *secname = elf_strptr (mod->main.elf, shstrndx,
325					    shdr->sh_name);
326	  if (unlikely (secname == NULL))
327	    return DWFL_E_LIBELF;
328	  if (!strcmp (secname, ".gnu.prelink_undo"))
329	    break;
330	}
331    }
332
333  if (scn == NULL)
334    /* There was no .gnu.prelink_undo section.  */
335    return DWFL_E_NOERROR;
336
337  Elf_Data *undodata = elf_rawdata (scn, NULL);
338  if (unlikely (undodata == NULL))
339    return DWFL_E_LIBELF;
340
341  /* Decode the section.  It consists of the original ehdr, phdrs,
342     and shdrs (but omits section 0).  */
343
344  union
345  {
346    Elf32_Ehdr e32;
347    Elf64_Ehdr e64;
348  } ehdr;
349  Elf_Data dst =
350    {
351      .d_buf = &ehdr,
352      .d_size = sizeof ehdr,
353      .d_type = ELF_T_EHDR,
354      .d_version = EV_CURRENT
355    };
356  Elf_Data src = *undodata;
357  src.d_size = gelf_fsize (mod->main.elf, ELF_T_EHDR, 1, EV_CURRENT);
358  src.d_type = ELF_T_EHDR;
359  if (unlikely (gelf_xlatetom (mod->main.elf, &dst, &src,
360			       elf_getident (mod->main.elf, NULL)[EI_DATA])
361		== NULL))
362    return DWFL_E_LIBELF;
363
364  size_t shentsize = gelf_fsize (mod->main.elf, ELF_T_SHDR, 1, EV_CURRENT);
365  size_t phentsize = gelf_fsize (mod->main.elf, ELF_T_PHDR, 1, EV_CURRENT);
366
367  uint_fast16_t phnum;
368  uint_fast16_t shnum;
369  if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
370    {
371      if (ehdr.e32.e_shentsize != shentsize
372	  || ehdr.e32.e_phentsize != phentsize)
373	return DWFL_E_BAD_PRELINK;
374      phnum = ehdr.e32.e_phnum;
375      shnum = ehdr.e32.e_shnum;
376    }
377  else
378    {
379      if (ehdr.e64.e_shentsize != shentsize
380	  || ehdr.e64.e_phentsize != phentsize)
381	return DWFL_E_BAD_PRELINK;
382      phnum = ehdr.e64.e_phnum;
383      shnum = ehdr.e64.e_shnum;
384    }
385
386  /* Since prelink does not store the zeroth section header in the undo
387     section, it cannot support SHN_XINDEX encoding.  */
388  if (unlikely (shnum >= SHN_LORESERVE)
389      || unlikely (undodata->d_size != (src.d_size
390					+ phnum * phentsize
391					+ (shnum - 1) * shentsize)))
392    return DWFL_E_BAD_PRELINK;
393
394  /* We look at the allocated SHT_PROGBITS (or SHT_NOBITS) sections.  (Most
395     every file will have some SHT_PROGBITS sections, but it's possible to
396     have one with nothing but .bss, i.e. SHT_NOBITS.)  The special sections
397     that can be moved around have different sh_type values--except for
398     .interp, the section that became the PT_INTERP segment.  So we exclude
399     the SHT_PROGBITS section whose address matches the PT_INTERP p_vaddr.
400     For this reason, we must examine the phdrs first to find PT_INTERP.  */
401
402  GElf_Addr main_interp = 0;
403  {
404    size_t main_phnum;
405    if (unlikely (elf_getphdrnum (mod->main.elf, &main_phnum)))
406      return DWFL_E_LIBELF;
407    for (size_t i = 0; i < main_phnum; ++i)
408      {
409	GElf_Phdr phdr;
410	if (unlikely (gelf_getphdr (mod->main.elf, i, &phdr) == NULL))
411	  return DWFL_E_LIBELF;
412	if (phdr.p_type == PT_INTERP)
413	  {
414	    main_interp = phdr.p_vaddr;
415	    break;
416	  }
417      }
418  }
419
420  src.d_buf += src.d_size;
421  src.d_type = ELF_T_PHDR;
422  src.d_size = phnum * phentsize;
423
424  GElf_Addr undo_interp = 0;
425  {
426    union
427    {
428      Elf32_Phdr p32[phnum];
429      Elf64_Phdr p64[phnum];
430    } phdr;
431    dst.d_buf = &phdr;
432    dst.d_size = sizeof phdr;
433    if (unlikely (gelf_xlatetom (mod->main.elf, &dst, &src,
434				 ehdr.e32.e_ident[EI_DATA]) == NULL))
435      return DWFL_E_LIBELF;
436    if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
437      {
438	for (uint_fast16_t i = 0; i < phnum; ++i)
439	  if (phdr.p32[i].p_type == PT_INTERP)
440	    {
441	      undo_interp = phdr.p32[i].p_vaddr;
442	      break;
443	    }
444      }
445    else
446      {
447	for (uint_fast16_t i = 0; i < phnum; ++i)
448	  if (phdr.p64[i].p_type == PT_INTERP)
449	    {
450	      undo_interp = phdr.p64[i].p_vaddr;
451	      break;
452	    }
453      }
454  }
455
456  if (unlikely ((main_interp == 0) != (undo_interp == 0)))
457    return DWFL_E_BAD_PRELINK;
458
459  src.d_buf += src.d_size;
460  src.d_type = ELF_T_SHDR;
461  src.d_size = gelf_fsize (mod->main.elf, ELF_T_SHDR, shnum - 1, EV_CURRENT);
462
463  union
464  {
465    Elf32_Shdr s32[shnum - 1];
466    Elf64_Shdr s64[shnum - 1];
467  } shdr;
468  dst.d_buf = &shdr;
469  dst.d_size = sizeof shdr;
470  if (unlikely (gelf_xlatetom (mod->main.elf, &dst, &src,
471			       ehdr.e32.e_ident[EI_DATA]) == NULL))
472    return DWFL_E_LIBELF;
473
474  /* Now we can look at the original section headers of the main file
475     before it was prelinked.  First we'll apply our method to the main
476     file sections as they are after prelinking, to calculate the
477     synchronization address of the main file.  Then we'll apply that
478     same method to the saved section headers, to calculate the matching
479     synchronization address of the debug file.
480
481     The method is to consider SHF_ALLOC sections that are either
482     SHT_PROGBITS or SHT_NOBITS, excluding the section whose sh_addr
483     matches the PT_INTERP p_vaddr.  The special sections that can be
484     moved by prelink have other types, except for .interp (which
485     becomes PT_INTERP).  The "real" sections cannot move as such, but
486     .bss can be split into .dynbss and .bss, with the total memory
487     image remaining the same but being spread across the two sections.
488     So we consider the highest section end, which still matches up.  */
489
490  GElf_Addr highest;
491
492  inline void consider_shdr (GElf_Addr interp,
493			     GElf_Word sh_type,
494			     GElf_Xword sh_flags,
495			     GElf_Addr sh_addr,
496			     GElf_Xword sh_size)
497  {
498    if ((sh_flags & SHF_ALLOC)
499	&& ((sh_type == SHT_PROGBITS && sh_addr != interp)
500	    || sh_type == SHT_NOBITS))
501      {
502	const GElf_Addr sh_end = sh_addr + sh_size;
503	if (sh_end > highest)
504	  highest = sh_end;
505      }
506  }
507
508  highest = 0;
509  scn = NULL;
510  while ((scn = elf_nextscn (mod->main.elf, scn)) != NULL)
511    {
512      GElf_Shdr sh_mem;
513      GElf_Shdr *sh = gelf_getshdr (scn, &sh_mem);
514      if (unlikely (sh == NULL))
515	return DWFL_E_LIBELF;
516      consider_shdr (main_interp, sh->sh_type, sh->sh_flags,
517		     sh->sh_addr, sh->sh_size);
518    }
519  if (highest > mod->main.vaddr)
520    {
521      mod->main.address_sync = highest;
522
523      highest = 0;
524      if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
525	for (size_t i = 0; i < shnum - 1; ++i)
526	  consider_shdr (undo_interp, shdr.s32[i].sh_type, shdr.s32[i].sh_flags,
527			 shdr.s32[i].sh_addr, shdr.s32[i].sh_size);
528      else
529	for (size_t i = 0; i < shnum - 1; ++i)
530	  consider_shdr (undo_interp, shdr.s64[i].sh_type, shdr.s64[i].sh_flags,
531			 shdr.s64[i].sh_addr, shdr.s64[i].sh_size);
532
533      if (highest > mod->debug.vaddr)
534	mod->debug.address_sync = highest;
535      else
536	return DWFL_E_BAD_PRELINK;
537    }
538
539  return DWFL_E_NOERROR;
540}
541
542/* Find the separate debuginfo file for this module and open libelf on it.
543   When we return success, MOD->debug is set up.  */
544static Dwfl_Error
545find_debuginfo (Dwfl_Module *mod)
546{
547  if (mod->debug.elf != NULL)
548    return DWFL_E_NOERROR;
549
550  GElf_Word debuglink_crc = 0;
551  const char *debuglink_file = find_debuglink (mod->main.elf, &debuglink_crc);
552
553  mod->debug.fd = (*mod->dwfl->callbacks->find_debuginfo) (MODCB_ARGS (mod),
554							   mod->main.name,
555							   debuglink_file,
556							   debuglink_crc,
557							   &mod->debug.name);
558  Dwfl_Error result = open_elf (mod, &mod->debug);
559  if (result == DWFL_E_NOERROR && mod->debug.address_sync != 0)
560    result = find_prelink_address_sync (mod);
561  return result;
562}
563
564
565/* Try to find a symbol table in FILE.
566   Returns DWFL_E_NOERROR if a proper one is found.
567   Returns DWFL_E_NO_SYMTAB if not, but still sets results for SHT_DYNSYM.  */
568static Dwfl_Error
569load_symtab (struct dwfl_file *file, struct dwfl_file **symfile,
570	     Elf_Scn **symscn, Elf_Scn **xndxscn,
571	     size_t *syments, int *first_global, GElf_Word *strshndx)
572{
573  bool symtab = false;
574  Elf_Scn *scn = NULL;
575  while ((scn = elf_nextscn (file->elf, scn)) != NULL)
576    {
577      GElf_Shdr shdr_mem, *shdr = gelf_getshdr (scn, &shdr_mem);
578      if (shdr != NULL)
579	switch (shdr->sh_type)
580	  {
581	  case SHT_SYMTAB:
582	    symtab = true;
583	    *symscn = scn;
584	    *symfile = file;
585	    *strshndx = shdr->sh_link;
586	    *syments = shdr->sh_size / shdr->sh_entsize;
587	    *first_global = shdr->sh_info;
588	    if (*xndxscn != NULL)
589	      return DWFL_E_NOERROR;
590	    break;
591
592	  case SHT_DYNSYM:
593	    if (symtab)
594	      break;
595	    /* Use this if need be, but keep looking for SHT_SYMTAB.  */
596	    *symscn = scn;
597	    *symfile = file;
598	    *strshndx = shdr->sh_link;
599	    *syments = shdr->sh_size / shdr->sh_entsize;
600	    break;
601
602	  case SHT_SYMTAB_SHNDX:
603	    *xndxscn = scn;
604	    if (symtab)
605	      return DWFL_E_NOERROR;
606	    break;
607
608	  default:
609	    break;
610	  }
611    }
612
613  if (symtab)
614    /* We found one, though no SHT_SYMTAB_SHNDX to go with it.  */
615    return DWFL_E_NOERROR;
616
617  /* We found no SHT_SYMTAB, so any SHT_SYMTAB_SHNDX was bogus.
618     We might have found an SHT_DYNSYM and set *SYMSCN et al though.  */
619  *xndxscn = NULL;
620  return DWFL_E_NO_SYMTAB;
621}
622
623
624/* Translate addresses into file offsets.
625   OFFS[*] start out zero and remain zero if unresolved.  */
626static void
627find_offsets (Elf *elf, size_t phnum, size_t n,
628	      GElf_Addr addrs[n], GElf_Off offs[n])
629{
630  size_t unsolved = n;
631  for (size_t i = 0; i < phnum; ++i)
632    {
633      GElf_Phdr phdr_mem;
634      GElf_Phdr *phdr = gelf_getphdr (elf, i, &phdr_mem);
635      if (phdr != NULL && phdr->p_type == PT_LOAD && phdr->p_memsz > 0)
636	for (size_t j = 0; j < n; ++j)
637	  if (offs[j] == 0
638	      && addrs[j] >= phdr->p_vaddr
639	      && addrs[j] - phdr->p_vaddr < phdr->p_filesz)
640	    {
641	      offs[j] = addrs[j] - phdr->p_vaddr + phdr->p_offset;
642	      if (--unsolved == 0)
643		break;
644	    }
645    }
646}
647
648/* Try to find a dynamic symbol table via phdrs.  */
649static void
650find_dynsym (Dwfl_Module *mod)
651{
652  GElf_Ehdr ehdr_mem;
653  GElf_Ehdr *ehdr = gelf_getehdr (mod->main.elf, &ehdr_mem);
654
655  size_t phnum;
656  if (unlikely (elf_getphdrnum (mod->main.elf, &phnum) != 0))
657    return;
658
659  for (size_t i = 0; i < phnum; ++i)
660    {
661      GElf_Phdr phdr_mem;
662      GElf_Phdr *phdr = gelf_getphdr (mod->main.elf, i, &phdr_mem);
663      if (phdr == NULL)
664	break;
665
666      if (phdr->p_type == PT_DYNAMIC)
667	{
668	  /* Examine the dynamic section for the pointers we need.  */
669
670	  Elf_Data *data = elf_getdata_rawchunk (mod->main.elf,
671						 phdr->p_offset, phdr->p_filesz,
672						 ELF_T_DYN);
673	  if (data == NULL)
674	    continue;
675
676	  enum
677	    {
678	      i_symtab,
679	      i_strtab,
680	      i_hash,
681	      i_gnu_hash,
682	      i_max
683	    };
684	  GElf_Addr addrs[i_max] = { 0, };
685	  GElf_Xword strsz = 0;
686	  size_t n = data->d_size / gelf_fsize (mod->main.elf,
687						ELF_T_DYN, 1, EV_CURRENT);
688	  for (size_t j = 0; j < n; ++j)
689	    {
690	      GElf_Dyn dyn_mem;
691	      GElf_Dyn *dyn = gelf_getdyn (data, j, &dyn_mem);
692	      if (dyn != NULL)
693		switch (dyn->d_tag)
694		  {
695		  case DT_SYMTAB:
696		    addrs[i_symtab] = dyn->d_un.d_ptr;
697		    continue;
698
699		  case DT_HASH:
700		    addrs[i_hash] = dyn->d_un.d_ptr;
701		    continue;
702
703		  case DT_GNU_HASH:
704		    addrs[i_gnu_hash] = dyn->d_un.d_ptr;
705		    continue;
706
707		  case DT_STRTAB:
708		    addrs[i_strtab] = dyn->d_un.d_ptr;
709		    continue;
710
711		  case DT_STRSZ:
712		    strsz = dyn->d_un.d_val;
713		    continue;
714
715		  default:
716		    continue;
717
718		  case DT_NULL:
719		    break;
720		  }
721	      break;
722	    }
723
724	  /* Translate pointers into file offsets.  */
725	  GElf_Off offs[i_max] = { 0, };
726	  find_offsets (mod->main.elf, phnum, i_max, addrs, offs);
727
728	  /* Figure out the size of the symbol table.  */
729	  if (offs[i_hash] != 0)
730	    {
731	      /* In the original format, .hash says the size of .dynsym.  */
732
733	      size_t entsz = SH_ENTSIZE_HASH (ehdr);
734	      data = elf_getdata_rawchunk (mod->main.elf,
735					   offs[i_hash] + entsz, entsz,
736					   entsz == 4 ? ELF_T_WORD
737					   : ELF_T_XWORD);
738	      if (data != NULL)
739		mod->syments = (entsz == 4
740				? *(const GElf_Word *) data->d_buf
741				: *(const GElf_Xword *) data->d_buf);
742	    }
743	  if (offs[i_gnu_hash] != 0 && mod->syments == 0)
744	    {
745	      /* In the new format, we can derive it with some work.  */
746
747	      const struct
748	      {
749		Elf32_Word nbuckets;
750		Elf32_Word symndx;
751		Elf32_Word maskwords;
752		Elf32_Word shift2;
753	      } *header;
754
755	      data = elf_getdata_rawchunk (mod->main.elf, offs[i_gnu_hash],
756					   sizeof *header, ELF_T_WORD);
757	      if (data != NULL)
758		{
759		  header = data->d_buf;
760		  Elf32_Word nbuckets = header->nbuckets;
761		  Elf32_Word symndx = header->symndx;
762		  GElf_Off buckets_at = (offs[i_gnu_hash] + sizeof *header
763					 + (gelf_getclass (mod->main.elf)
764					    * sizeof (Elf32_Word)
765					    * header->maskwords));
766
767		  data = elf_getdata_rawchunk (mod->main.elf, buckets_at,
768					       nbuckets * sizeof (Elf32_Word),
769					       ELF_T_WORD);
770		  if (data != NULL && symndx < nbuckets)
771		    {
772		      const Elf32_Word *const buckets = data->d_buf;
773		      Elf32_Word maxndx = symndx;
774		      for (Elf32_Word bucket = 0; bucket < nbuckets; ++bucket)
775			if (buckets[bucket] > maxndx)
776			  maxndx = buckets[bucket];
777
778		      GElf_Off hasharr_at = (buckets_at
779					     + nbuckets * sizeof (Elf32_Word));
780		      hasharr_at += (maxndx - symndx) * sizeof (Elf32_Word);
781		      do
782			{
783			  data = elf_getdata_rawchunk (mod->main.elf,
784						       hasharr_at,
785						       sizeof (Elf32_Word),
786						       ELF_T_WORD);
787			  if (data != NULL
788			      && (*(const Elf32_Word *) data->d_buf & 1u))
789			    {
790			      mod->syments = maxndx + 1;
791			      break;
792			    }
793			  ++maxndx;
794			  hasharr_at += sizeof (Elf32_Word);
795			} while (data != NULL);
796		    }
797		}
798	    }
799	  if (offs[i_strtab] > offs[i_symtab] && mod->syments == 0)
800	    mod->syments = ((offs[i_strtab] - offs[i_symtab])
801			    / gelf_fsize (mod->main.elf,
802					  ELF_T_SYM, 1, EV_CURRENT));
803
804	  if (mod->syments > 0)
805	    {
806	      mod->symdata = elf_getdata_rawchunk (mod->main.elf,
807						   offs[i_symtab],
808						   gelf_fsize (mod->main.elf,
809							       ELF_T_SYM,
810							       mod->syments,
811							       EV_CURRENT),
812						   ELF_T_SYM);
813	      if (mod->symdata != NULL)
814		{
815		  mod->symstrdata = elf_getdata_rawchunk (mod->main.elf,
816							  offs[i_strtab],
817							  strsz,
818							  ELF_T_BYTE);
819		  if (mod->symstrdata == NULL)
820		    mod->symdata = NULL;
821		}
822	      if (mod->symdata == NULL)
823		mod->symerr = DWFL_E (LIBELF, elf_errno ());
824	      else
825		{
826		  mod->symfile = &mod->main;
827		  mod->symerr = DWFL_E_NOERROR;
828		}
829	      return;
830	    }
831	}
832    }
833}
834
835/* Try to find a symbol table in either MOD->main.elf or MOD->debug.elf.  */
836static void
837find_symtab (Dwfl_Module *mod)
838{
839  if (mod->symdata != NULL	/* Already done.  */
840      || mod->symerr != DWFL_E_NOERROR) /* Cached previous failure.  */
841    return;
842
843  __libdwfl_getelf (mod);
844  mod->symerr = mod->elferr;
845  if (mod->symerr != DWFL_E_NOERROR)
846    return;
847
848  mod->first_global = -1; /* Unknown, unless explicitly set by load_symtab.  */
849
850  /* First see if the main ELF file has the debugging information.  */
851  Elf_Scn *symscn = NULL, *xndxscn = NULL;
852  GElf_Word strshndx;
853  mod->symerr = load_symtab (&mod->main, &mod->symfile, &symscn,
854			     &xndxscn, &mod->syments, &mod->first_global,
855			     &strshndx);
856  switch (mod->symerr)
857    {
858    default:
859      return;
860
861    case DWFL_E_NOERROR:
862      break;
863
864    case DWFL_E_NO_SYMTAB:
865      /* Now we have to look for a separate debuginfo file.  */
866      mod->symerr = find_debuginfo (mod);
867      switch (mod->symerr)
868	{
869	default:
870	  return;
871
872	case DWFL_E_NOERROR:
873	  mod->symerr = load_symtab (&mod->debug, &mod->symfile, &symscn,
874				     &xndxscn, &mod->syments,
875				     &mod->first_global, &strshndx);
876	  break;
877
878	case DWFL_E_CB:		/* The find_debuginfo hook failed.  */
879	  mod->symerr = DWFL_E_NO_SYMTAB;
880	  break;
881	}
882
883      switch (mod->symerr)
884	{
885	default:
886	  return;
887
888	case DWFL_E_NOERROR:
889	  break;
890
891	case DWFL_E_NO_SYMTAB:
892	  if (symscn != NULL)
893	    {
894	      /* We still have the dynamic symbol table.  */
895	      mod->symerr = DWFL_E_NOERROR;
896	      break;
897	    }
898
899	  /* Last ditch, look for dynamic symbols without section headers.  */
900	  find_dynsym (mod);
901	  return;
902	}
903      break;
904    }
905
906  /* This does some sanity checks on the string table section.  */
907  if (elf_strptr (mod->symfile->elf, strshndx, 0) == NULL)
908    {
909    elferr:
910      mod->symerr = DWFL_E (LIBELF, elf_errno ());
911      return;
912    }
913
914  /* Cache the data; MOD->syments and MOD->first_global were set above.  */
915
916  mod->symstrdata = elf_getdata (elf_getscn (mod->symfile->elf, strshndx),
917				 NULL);
918  if (mod->symstrdata == NULL)
919    goto elferr;
920
921  if (xndxscn == NULL)
922    mod->symxndxdata = NULL;
923  else
924    {
925      mod->symxndxdata = elf_getdata (xndxscn, NULL);
926      if (mod->symxndxdata == NULL)
927	goto elferr;
928    }
929
930  mod->symdata = elf_getdata (symscn, NULL);
931  if (mod->symdata == NULL)
932    goto elferr;
933}
934
935
936/* Try to open a libebl backend for MOD.  */
937Dwfl_Error
938internal_function
939__libdwfl_module_getebl (Dwfl_Module *mod)
940{
941  if (mod->ebl == NULL)
942    {
943      __libdwfl_getelf (mod);
944      if (mod->elferr != DWFL_E_NOERROR)
945	return mod->elferr;
946
947      mod->ebl = ebl_openbackend (mod->main.elf);
948      if (mod->ebl == NULL)
949	return DWFL_E_LIBEBL;
950    }
951  return DWFL_E_NOERROR;
952}
953
954/* Try to start up libdw on DEBUGFILE.  */
955static Dwfl_Error
956load_dw (Dwfl_Module *mod, struct dwfl_file *debugfile)
957{
958  if (mod->e_type == ET_REL && !debugfile->relocated)
959    {
960      const Dwfl_Callbacks *const cb = mod->dwfl->callbacks;
961
962      /* The debugging sections have to be relocated.  */
963      if (cb->section_address == NULL)
964	return DWFL_E_NOREL;
965
966      Dwfl_Error error = __libdwfl_module_getebl (mod);
967      if (error != DWFL_E_NOERROR)
968	return error;
969
970      find_symtab (mod);
971      Dwfl_Error result = mod->symerr;
972      if (result == DWFL_E_NOERROR)
973	result = __libdwfl_relocate (mod, debugfile->elf, true);
974      if (result != DWFL_E_NOERROR)
975	return result;
976
977      /* Don't keep the file descriptors around.  */
978      if (mod->main.fd != -1 && elf_cntl (mod->main.elf, ELF_C_FDREAD) == 0)
979	{
980	  close (mod->main.fd);
981	  mod->main.fd = -1;
982	}
983      if (debugfile->fd != -1 && elf_cntl (debugfile->elf, ELF_C_FDREAD) == 0)
984	{
985	  close (debugfile->fd);
986	  debugfile->fd = -1;
987	}
988    }
989
990  mod->dw = INTUSE(dwarf_begin_elf) (debugfile->elf, DWARF_C_READ, NULL);
991  if (mod->dw == NULL)
992    {
993      int err = INTUSE(dwarf_errno) ();
994      return err == DWARF_E_NO_DWARF ? DWFL_E_NO_DWARF : DWFL_E (LIBDW, err);
995    }
996
997  /* Until we have iterated through all CU's, we might do lazy lookups.  */
998  mod->lazycu = 1;
999
1000  return DWFL_E_NOERROR;
1001}
1002
1003/* Try to start up libdw on either the main file or the debuginfo file.  */
1004static void
1005find_dw (Dwfl_Module *mod)
1006{
1007  if (mod->dw != NULL		/* Already done.  */
1008      || mod->dwerr != DWFL_E_NOERROR) /* Cached previous failure.  */
1009    return;
1010
1011  __libdwfl_getelf (mod);
1012  mod->dwerr = mod->elferr;
1013  if (mod->dwerr != DWFL_E_NOERROR)
1014    return;
1015
1016  /* First see if the main ELF file has the debugging information.  */
1017  mod->dwerr = load_dw (mod, &mod->main);
1018  switch (mod->dwerr)
1019    {
1020    case DWFL_E_NOERROR:
1021      mod->debug.elf = mod->main.elf;
1022      mod->debug.address_sync = mod->main.address_sync;
1023      return;
1024
1025    case DWFL_E_NO_DWARF:
1026      break;
1027
1028    default:
1029      goto canonicalize;
1030    }
1031
1032  /* Now we have to look for a separate debuginfo file.  */
1033  mod->dwerr = find_debuginfo (mod);
1034  switch (mod->dwerr)
1035    {
1036    case DWFL_E_NOERROR:
1037      mod->dwerr = load_dw (mod, &mod->debug);
1038      break;
1039
1040    case DWFL_E_CB:		/* The find_debuginfo hook failed.  */
1041      mod->dwerr = DWFL_E_NO_DWARF;
1042      return;
1043
1044    default:
1045      break;
1046    }
1047
1048 canonicalize:
1049  mod->dwerr = __libdwfl_canon_error (mod->dwerr);
1050}
1051
1052Dwarf *
1053dwfl_module_getdwarf (Dwfl_Module *mod, Dwarf_Addr *bias)
1054{
1055  if (mod == NULL)
1056    return NULL;
1057
1058  find_dw (mod);
1059  if (mod->dwerr == DWFL_E_NOERROR)
1060    {
1061      /* If dwfl_module_getelf was used previously, then partial apply
1062	 relocation to miscellaneous sections in the debug file too.  */
1063      if (mod->e_type == ET_REL
1064	  && mod->main.relocated && ! mod->debug.relocated)
1065	{
1066	  mod->debug.relocated = true;
1067	  if (mod->debug.elf != mod->main.elf)
1068	    (void) __libdwfl_relocate (mod, mod->debug.elf, false);
1069	}
1070
1071      *bias = dwfl_adjusted_dwarf_addr (mod, 0);
1072      return mod->dw;
1073    }
1074
1075  __libdwfl_seterrno (mod->dwerr);
1076  return NULL;
1077}
1078INTDEF (dwfl_module_getdwarf)
1079
1080int
1081dwfl_module_getsymtab (Dwfl_Module *mod)
1082{
1083  if (mod == NULL)
1084    return -1;
1085
1086  find_symtab (mod);
1087  if (mod->symerr == DWFL_E_NOERROR)
1088    return mod->syments;
1089
1090  __libdwfl_seterrno (mod->symerr);
1091  return -1;
1092}
1093INTDEF (dwfl_module_getsymtab)
1094