linux-kernel-modules.c revision 15edce876ace54b89eebd323d89643e7712e717a
1/* Standard libdwfl callbacks for debugging the running Linux kernel.
2   Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3   This file is part of elfutils.
4
5   This file is free software; you can redistribute it and/or modify
6   it under the terms of either
7
8     * the GNU Lesser General Public License as published by the Free
9       Software Foundation; either version 3 of the License, or (at
10       your option) any later version
11
12   or
13
14     * the GNU General Public License as published by the Free
15       Software Foundation; either version 2 of the License, or (at
16       your option) any later version
17
18   or both in parallel, as here.
19
20   elfutils is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received copies of the GNU General Public License and
26   the GNU Lesser General Public License along with this program.  If
27   not, see <http://www.gnu.org/licenses/>.  */
28
29/* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
30   Everything we need here is fine if its declarations just come first.  */
31/* Some makefiles, e.g. HOST_linux-x86.mk, predefine _FILE_OFFSET_BITS.  */
32#undef _FILE_OFFSET_BITS
33#include <fts.h>
34
35#include <config.h>
36
37#include "libdwflP.h"
38#include <inttypes.h>
39#include <errno.h>
40#include <stdio.h>
41#include <stdio_ext.h>
42#include <string.h>
43#include <stdlib.h>
44#include <sys/utsname.h>
45#include <fcntl.h>
46#include <unistd.h>
47
48
49#define KERNEL_MODNAME	"kernel"
50
51#define MODULEDIRFMT	"/lib/modules/%s"
52
53#define KNOTESFILE	"/sys/kernel/notes"
54#define	MODNOTESFMT	"/sys/module/%s/notes"
55#define KSYMSFILE	"/proc/kallsyms"
56#define MODULELIST	"/proc/modules"
57#define	SECADDRDIRFMT	"/sys/module/%s/sections/"
58#define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
59
60
61#if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
62static const char *vmlinux_suffixes[] =
63  {
64#ifdef USE_ZLIB
65    ".gz",
66#endif
67#ifdef USE_BZLIB
68    ".bz2",
69#endif
70#ifdef USE_LZMA
71    ".xz",
72#endif
73  };
74#endif
75
76/* Try to open the given file as it is or under the debuginfo directory.  */
77static int
78try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
79{
80  if (*fname == NULL)
81    return -1;
82
83  /* Don't bother trying *FNAME itself here if the path will cause it to be
84     tried because we give its own basename as DEBUGLINK_FILE.  */
85  int fd = ((((dwfl->callbacks->debuginfo_path
86	       ? *dwfl->callbacks->debuginfo_path : NULL)
87	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
88	    : TEMP_FAILURE_RETRY (open64 (*fname, O_RDONLY)));
89
90  if (fd < 0)
91    {
92      Dwfl_Module fakemod = { .dwfl = dwfl };
93      /* First try the file's unadorned basename as DEBUGLINK_FILE,
94	 to look for "vmlinux" files.  */
95      fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
96						 *fname, basename (*fname), 0,
97						 &fakemod.debug.name);
98      if (fd < 0 && try_debug)
99	/* Next, let the call use the default of basename + ".debug",
100	   to look for "vmlinux.debug" files.  */
101	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
102						   *fname, NULL, 0,
103						   &fakemod.debug.name);
104      if (fakemod.debug.name != NULL)
105	{
106	  free (*fname);
107	  *fname = fakemod.debug.name;
108	}
109    }
110
111#if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
112  if (fd < 0)
113    for (size_t i = 0;
114	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
115	 ++i)
116      {
117	char *zname;
118	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
119	  {
120	    fd = TEMP_FAILURE_RETRY (open64 (zname, O_RDONLY));
121	    if (fd < 0)
122	      free (zname);
123	    else
124	      {
125		free (*fname);
126		*fname = zname;
127	      }
128	  }
129      }
130#endif
131
132  if (fd < 0)
133    {
134      free (*fname);
135      *fname = NULL;
136    }
137
138  return fd;
139}
140
141static inline const char *
142kernel_release (void)
143{
144  /* Cache the `uname -r` string we'll use.  */
145  static struct utsname utsname;
146  if (utsname.release[0] == '\0' && uname (&utsname) != 0)
147    return NULL;
148  return utsname.release;
149}
150
151static int
152find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
153{
154  if ((release[0] == '/'
155       ? asprintf (fname, "%s/vmlinux", release)
156       : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
157    return -1;
158
159  int fd = try_kernel_name (dwfl, fname, true);
160  if (fd < 0 && release[0] != '/')
161    {
162      free (*fname);
163      if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
164	return -1;
165      fd = try_kernel_name (dwfl, fname, true);
166    }
167
168  return fd;
169}
170
171static int
172get_release (Dwfl *dwfl, const char **release)
173{
174  if (dwfl == NULL)
175    return -1;
176
177  const char *release_string = release == NULL ? NULL : *release;
178  if (release_string == NULL)
179    {
180      release_string = kernel_release ();
181      if (release_string == NULL)
182	return errno;
183      if (release != NULL)
184	*release = release_string;
185    }
186
187  return 0;
188}
189
190static int
191report_kernel (Dwfl *dwfl, const char **release,
192	       int (*predicate) (const char *module, const char *file))
193{
194  int result = get_release (dwfl, release);
195  if (unlikely (result != 0))
196    return result;
197
198  char *fname;
199  int fd = find_kernel_elf (dwfl, *release, &fname);
200
201  if (fd < 0)
202    result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
203	      ? 0 : errno ?: ENOENT);
204  else
205    {
206      bool report = true;
207
208      if (predicate != NULL)
209	{
210	  /* Let the predicate decide whether to use this one.  */
211	  int want = (*predicate) (KERNEL_MODNAME, fname);
212	  if (want < 0)
213	    result = errno;
214	  report = want > 0;
215	}
216
217      if (report)
218	{
219	  /* Note that on some architectures (e.g. x86_64) the vmlinux
220	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
221	     In both cases the phdr p_vaddr load address will be non-zero.
222	     We want the image to be placed as if it was ET_DYN, so
223	     pass true for add_p_vaddr which will do the right thing
224	     (in combination with a zero base) in either case.  */
225	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
226						      fname, fd, 0, true);
227	  if (mod == NULL)
228	    result = -1;
229	  else
230	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
231	    mod->e_type = ET_DYN;
232	}
233
234      free (fname);
235
236      if (!report || result < 0)
237	close (fd);
238    }
239
240  return result;
241}
242
243/* Look for a kernel debug archive.  If we find one, report all its modules.
244   If not, return ENOENT.  */
245static int
246report_kernel_archive (Dwfl *dwfl, const char **release,
247		       int (*predicate) (const char *module, const char *file))
248{
249  int result = get_release (dwfl, release);
250  if (unlikely (result != 0))
251    return result;
252
253  char *archive;
254  int res = (((*release)[0] == '/')
255	     ? asprintf (&archive, "%s/debug.a", *release)
256	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
257  if (unlikely (res < 0))
258    return ENOMEM;
259
260  int fd = try_kernel_name (dwfl, &archive, false);
261  if (fd < 0)
262    result = errno ?: ENOENT;
263  else
264    {
265      /* We have the archive file open!  */
266      Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
267						    true, predicate);
268      if (unlikely (last == NULL))
269	result = -1;
270      else
271	{
272	  /* Find the kernel and move it to the head of the list.  */
273	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
274	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
275	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
276	      {
277		*prevp = m->next;
278		m->next = *tailp;
279		*tailp = m;
280		break;
281	      }
282	}
283    }
284
285  free (archive);
286  return result;
287}
288
289static size_t
290check_suffix (const FTSENT *f, size_t namelen)
291{
292#define TRY(sfx)							\
293  if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
294       : f->fts_namelen >= sizeof sfx)					\
295      && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
296		  sfx, sizeof sfx))					\
297    return sizeof sfx - 1
298
299  TRY (".ko");
300#if USE_ZLIB
301  TRY (".ko.gz");
302#endif
303#if USE_BZLIB
304  TRY (".ko.bz2");
305#endif
306#if USE_LZMA
307  TRY (".ko.xz");
308#endif
309
310  return 0;
311
312#undef	TRY
313}
314
315/* Report a kernel and all its modules found on disk, for offline use.
316   If RELEASE starts with '/', it names a directory to look in;
317   if not, it names a directory to find under /lib/modules/;
318   if null, /lib/modules/`uname -r` is used.
319   Returns zero on success, -1 if dwfl_report_module failed,
320   or an errno code if finding the files on disk failed.  */
321int
322dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
323				  int (*predicate) (const char *module,
324						    const char *file))
325{
326  int result = report_kernel_archive (dwfl, &release, predicate);
327  if (result != ENOENT)
328    return result;
329
330  /* First report the kernel.  */
331  result = report_kernel (dwfl, &release, predicate);
332  if (result == 0)
333    {
334      /* Do "find /lib/modules/RELEASE -name *.ko".  */
335
336      char *modulesdir[] = { NULL, NULL };
337      if (release[0] == '/')
338	modulesdir[0] = (char *) release;
339      else
340	{
341	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
342	    return errno;
343	}
344
345      FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
346      if (modulesdir[0] == (char *) release)
347	modulesdir[0] = NULL;
348      if (fts == NULL)
349	{
350	  free (modulesdir[0]);
351	  return errno;
352	}
353
354      FTSENT *f;
355      while ((f = fts_read (fts)) != NULL)
356	{
357	  /* Skip a "source" subtree, which tends to be large.
358	     This insane hard-coding of names is what depmod does too.  */
359	  if (f->fts_namelen == sizeof "source" - 1
360	      && !strcmp (f->fts_name, "source"))
361	    {
362	      fts_set (fts, f, FTS_SKIP);
363	      continue;
364	    }
365
366	  switch (f->fts_info)
367	    {
368	    case FTS_F:
369	    case FTS_SL:
370	    case FTS_NSOK:;
371	      /* See if this file name matches "*.ko".  */
372	      const size_t suffix = check_suffix (f, 0);
373	      if (suffix)
374		{
375		  /* We have a .ko file to report.  Following the algorithm
376		     by which the kernel makefiles set KBUILD_MODNAME, we
377		     replace all ',' or '-' with '_' in the file name and
378		     call that the module name.  Modules could well be
379		     built using different embedded names than their file
380		     names.  To handle that, we would have to look at the
381		     __this_module.name contents in the module's text.  */
382
383		  char *name = strndup (f->fts_name, f->fts_namelen - suffix);
384		  if (unlikely (name == NULL))
385		    {
386		      __libdwfl_seterrno (DWFL_E_NOMEM);
387		      result = -1;
388		      break;
389		    }
390		  for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
391		    if (name[i] == '-' || name[i] == ',')
392		      name[i] = '_';
393
394		  if (predicate != NULL)
395		    {
396		      /* Let the predicate decide whether to use this one.  */
397		      int want = (*predicate) (name, f->fts_path);
398		      if (want < 0)
399			{
400			  result = -1;
401			  free (name);
402			  break;
403			}
404		      if (!want)
405			{
406			  free (name);
407			  continue;
408			}
409		    }
410
411		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
412		    {
413		      free (name);
414		      result = -1;
415		      break;
416		    }
417		  free (name);
418		}
419	      continue;
420
421	    case FTS_ERR:
422	    case FTS_DNR:
423	    case FTS_NS:
424	      result = f->fts_errno;
425	      break;
426
427	    case FTS_SLNONE:
428	    default:
429	      continue;
430	    }
431
432	  /* We only get here in error cases.  */
433	  break;
434	}
435      fts_close (fts);
436      free (modulesdir[0]);
437    }
438
439  return result;
440}
441INTDEF (dwfl_linux_kernel_report_offline)
442
443
444/* State of read_address used by intuit_kernel_bounds. */
445struct read_address_state {
446  FILE *f;
447  char *line;
448  size_t linesz;
449  size_t n;
450  char *p;
451  const char *type;
452};
453
454static inline bool
455read_address (struct read_address_state *state, Dwarf_Addr *addr)
456{
457  if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
458      state->line[state->n - 2] == ']')
459    return false;
460  *addr = strtoull (state->line, &state->p, 16);
461  state->p += strspn (state->p, " \t");
462  state->type = strsep (&state->p, " \t\n");
463  if (state->type == NULL)
464    return false;
465  return state->p != NULL && state->p != state->line;
466}
467
468
469/* Grovel around to guess the bounds of the runtime kernel image.  */
470static int
471intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
472{
473  struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
474
475  state.f = fopen (KSYMSFILE, "r");
476  if (state.f == NULL)
477    return errno;
478
479  (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
480
481  *notes = 0;
482
483  int result;
484  do
485    result = read_address (&state, start) ? 0 : -1;
486  while (result == 0 && strchr ("TtRr", *state.type) == NULL);
487
488  if (result == 0)
489    {
490      *end = *start;
491      while (read_address (&state, end))
492	if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
493	  *notes = *end;
494
495      Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
496      *start &= -(Dwarf_Addr) round_kernel;
497      *end += round_kernel - 1;
498      *end &= -(Dwarf_Addr) round_kernel;
499      if (*start >= *end || *end - *start < round_kernel)
500	result = -1;
501    }
502  free (state.line);
503
504  if (result == -1)
505    result = ferror_unlocked (state.f) ? errno : ENOEXEC;
506
507  fclose (state.f);
508
509  return result;
510}
511
512
513/* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
514static int
515check_notes (Dwfl_Module *mod, const char *notesfile,
516	     Dwarf_Addr vaddr, const char *secname)
517{
518  int fd = open64 (notesfile, O_RDONLY);
519  if (fd < 0)
520    return 1;
521
522  assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
523  assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
524  union
525  {
526    GElf_Nhdr nhdr;
527    unsigned char data[8192];
528  } buf;
529
530  ssize_t n = read (fd, buf.data, sizeof buf);
531  close (fd);
532
533  if (n <= 0)
534    return 1;
535
536  unsigned char *p = buf.data;
537  while (p < &buf.data[n])
538    {
539      /* No translation required since we are reading the native kernel.  */
540      GElf_Nhdr *nhdr = (void *) p;
541      p += sizeof *nhdr;
542      unsigned char *name = p;
543      p += (nhdr->n_namesz + 3) & -4U;
544      unsigned char *bits = p;
545      p += (nhdr->n_descsz + 3) & -4U;
546
547      if (p <= &buf.data[n]
548	  && nhdr->n_type == NT_GNU_BUILD_ID
549	  && nhdr->n_namesz == sizeof "GNU"
550	  && !memcmp (name, "GNU", sizeof "GNU"))
551	{
552	  /* Found it.  For a module we must figure out its VADDR now.  */
553
554	  if (secname != NULL
555	      && (INTUSE(dwfl_linux_kernel_module_section_address)
556		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
557		  || vaddr == (GElf_Addr) -1l))
558	    vaddr = 0;
559
560	  if (vaddr != 0)
561	    vaddr += bits - buf.data;
562	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
563						      nhdr->n_descsz, vaddr);
564	}
565    }
566
567  return 0;
568}
569
570/* Look for a build ID for the kernel.  */
571static int
572check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
573{
574  return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
575}
576
577/* Look for a build ID for a loaded kernel module.  */
578static int
579check_module_notes (Dwfl_Module *mod)
580{
581  char *dirs[2] = { NULL, NULL };
582  if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
583    return ENOMEM;
584
585  FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
586  if (fts == NULL)
587    {
588      free (dirs[0]);
589      return 0;
590    }
591
592  int result = 0;
593  FTSENT *f;
594  while ((f = fts_read (fts)) != NULL)
595    {
596      switch (f->fts_info)
597	{
598	case FTS_F:
599	case FTS_SL:
600	case FTS_NSOK:
601	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
602	  if (result > 0)	/* Nothing found.  */
603	    {
604	      result = 0;
605	      continue;
606	    }
607	  break;
608
609	case FTS_ERR:
610	case FTS_DNR:
611	  result = f->fts_errno;
612	  break;
613
614	case FTS_NS:
615	case FTS_SLNONE:
616	default:
617	  continue;
618	}
619
620      /* We only get here when finished or in error cases.  */
621      break;
622    }
623  fts_close (fts);
624  free (dirs[0]);
625
626  return result;
627}
628
629int
630dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
631{
632  Dwarf_Addr start = 0;
633  Dwarf_Addr end = 0;
634
635  #define report() \
636    (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
637
638  /* This is a bit of a kludge.  If we already reported the kernel,
639     don't bother figuring it out again--it never changes.  */
640  for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
641    if (!strcmp (m->name, KERNEL_MODNAME))
642      {
643	start = m->low_addr;
644	end = m->high_addr;
645	return report () == NULL ? -1 : 0;
646      }
647
648  /* Try to figure out the bounds of the kernel image without
649     looking for any vmlinux file.  */
650  Dwarf_Addr notes;
651  /* The compiler cannot deduce that if intuit_kernel_bounds returns
652     zero NOTES will be initialized.  Fake the initialization.  */
653  asm ("" : "=m" (notes));
654  int result = intuit_kernel_bounds (&start, &end, &notes);
655  if (result == 0)
656    {
657      Dwfl_Module *mod = report ();
658      return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
659    }
660  if (result != ENOENT)
661    return result;
662
663  /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
664  return report_kernel (dwfl, NULL, NULL);
665}
666INTDEF (dwfl_linux_kernel_report_kernel)
667
668
669static inline bool
670subst_name (char from, char to,
671            const char * const module_name,
672            char * const alternate_name,
673            const size_t namelen)
674{
675  const char *n = memchr (module_name, from, namelen);
676  if (n == NULL)
677    return false;
678  char *a = mempcpy (alternate_name, module_name, n - module_name);
679  *a++ = to;
680  ++n;
681  const char *p;
682  while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
683    {
684      a = mempcpy (a, n, p - n);
685      *a++ = to;
686      n = p + 1;
687    }
688  memcpy (a, n, namelen - (n - module_name) + 1);
689  return true;
690}
691
692/* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
693
694int
695dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
696			    void **userdata __attribute__ ((unused)),
697			    const char *module_name,
698			    Dwarf_Addr base __attribute__ ((unused)),
699			    char **file_name, Elf **elfp)
700{
701  if (mod->build_id_len > 0)
702    {
703      int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
704					       file_name, elfp);
705      if (fd >= 0 || mod->main.elf != NULL || errno != 0)
706	return fd;
707    }
708
709  const char *release = kernel_release ();
710  if (release == NULL)
711    return errno;
712
713  if (!strcmp (module_name, KERNEL_MODNAME))
714    return find_kernel_elf (mod->dwfl, release, file_name);
715
716  /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
717
718  char *modulesdir[] = { NULL, NULL };
719  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
720    return -1;
721
722  FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
723  if (fts == NULL)
724    {
725      free (modulesdir[0]);
726      return -1;
727    }
728
729  size_t namelen = strlen (module_name);
730
731  /* This is a kludge.  There is no actual necessary relationship between
732     the name of the .ko file installed and the module name the kernel
733     knows it by when it's loaded.  The kernel's only idea of the module
734     name comes from the name embedded in the object's magic
735     .gnu.linkonce.this_module section.
736
737     In practice, these module names match the .ko file names except for
738     some using '_' and some using '-'.  So our cheap kludge is to look for
739     two files when either a '_' or '-' appears in a module name, one using
740     only '_' and one only using '-'.  */
741
742  char *alternate_name = malloc (namelen + 1);
743  if (unlikely (alternate_name == NULL))
744    {
745      free (modulesdir[0]);
746      return ENOMEM;
747    }
748  if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
749      !subst_name ('_', '-', module_name, alternate_name, namelen))
750    alternate_name[0] = '\0';
751
752  FTSENT *f;
753  int error = ENOENT;
754  while ((f = fts_read (fts)) != NULL)
755    {
756      /* Skip a "source" subtree, which tends to be large.
757	 This insane hard-coding of names is what depmod does too.  */
758      if (f->fts_namelen == sizeof "source" - 1
759	  && !strcmp (f->fts_name, "source"))
760	{
761	  fts_set (fts, f, FTS_SKIP);
762	  continue;
763	}
764
765      error = ENOENT;
766      switch (f->fts_info)
767	{
768	case FTS_F:
769	case FTS_SL:
770	case FTS_NSOK:
771	  /* See if this file name is "MODULE_NAME.ko".  */
772	  if (check_suffix (f, namelen)
773	      && (!memcmp (f->fts_name, module_name, namelen)
774		  || !memcmp (f->fts_name, alternate_name, namelen)))
775	    {
776	      int fd = open64 (f->fts_accpath, O_RDONLY);
777	      *file_name = strdup (f->fts_path);
778	      fts_close (fts);
779	      free (modulesdir[0]);
780	      free (alternate_name);
781	      if (fd < 0)
782		free (*file_name);
783	      else if (*file_name == NULL)
784		{
785		  close (fd);
786		  fd = -1;
787		}
788	      return fd;
789	    }
790	  break;
791
792	case FTS_ERR:
793	case FTS_DNR:
794	case FTS_NS:
795	  error = f->fts_errno;
796	  break;
797
798	case FTS_SLNONE:
799	default:
800	  break;
801	}
802    }
803
804  fts_close (fts);
805  free (modulesdir[0]);
806  free (alternate_name);
807  errno = error;
808  return -1;
809}
810INTDEF (dwfl_linux_kernel_find_elf)
811
812
813/* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
814   We read the information from /sys/module directly.  */
815
816int
817dwfl_linux_kernel_module_section_address
818(Dwfl_Module *mod __attribute__ ((unused)),
819 void **userdata __attribute__ ((unused)),
820 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
821 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
822 const GElf_Shdr *shdr __attribute__ ((unused)),
823 Dwarf_Addr *addr)
824{
825  char *sysfile;
826  if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
827    return DWARF_CB_ABORT;
828
829  FILE *f = fopen (sysfile, "r");
830  free (sysfile);
831
832  if (f == NULL)
833    {
834      if (errno == ENOENT)
835	{
836	  /* The .modinfo and .data.percpu sections are never kept
837	     loaded in the kernel.  If the kernel was compiled without
838	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
839	     actually loaded at all.
840
841	     Setting *ADDR to -1 tells the caller this section is
842	     actually absent from memory.  */
843
844	  if (!strcmp (secname, ".modinfo")
845	      || !strcmp (secname, ".data.percpu")
846	      || !strncmp (secname, ".exit", 5))
847	    {
848	      *addr = (Dwarf_Addr) -1l;
849	      return DWARF_CB_OK;
850	    }
851
852	  /* The goofy PPC64 module_frob_arch_sections function tweaks
853	     the section names as a way to control other kernel code's
854	     behavior, and this cruft leaks out into the /sys information.
855	     The file name for ".init*" may actually look like "_init*".  */
856
857	  const bool is_init = !strncmp (secname, ".init", 5);
858	  if (is_init)
859	    {
860	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
861			    modname, &secname[1]) < 0)
862		return ENOMEM;
863	      f = fopen (sysfile, "r");
864	      free (sysfile);
865	      if (f != NULL)
866		goto ok;
867	    }
868
869	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
870	     In case that size increases in the future, look for longer
871	     truncated names first.  */
872	  size_t namelen = strlen (secname);
873	  if (namelen >= MODULE_SECT_NAME_LEN)
874	    {
875	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
876				  modname, secname);
877	      if (len < 0)
878		return DWARF_CB_ABORT;
879	      char *end = sysfile + len;
880	      do
881		{
882		  *--end = '\0';
883		  f = fopen (sysfile, "r");
884		  if (is_init && f == NULL && errno == ENOENT)
885		    {
886		      sysfile[len - namelen] = '_';
887		      f = fopen (sysfile, "r");
888		      sysfile[len - namelen] = '.';
889		    }
890		}
891	      while (f == NULL && errno == ENOENT
892		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
893	      free (sysfile);
894
895	      if (f != NULL)
896		goto ok;
897	    }
898	}
899
900      return DWARF_CB_ABORT;
901    }
902
903 ok:
904  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
905
906  int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
907		: ferror_unlocked (f) ? errno : ENOEXEC);
908  fclose (f);
909
910  if (result == 0)
911    return DWARF_CB_OK;
912
913  errno = result;
914  return DWARF_CB_ABORT;
915}
916INTDEF (dwfl_linux_kernel_module_section_address)
917
918int
919dwfl_linux_kernel_report_modules (Dwfl *dwfl)
920{
921  FILE *f = fopen (MODULELIST, "r");
922  if (f == NULL)
923    return errno;
924
925  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
926
927  int result = 0;
928  Dwarf_Addr modaddr;
929  unsigned long int modsz;
930  char modname[128];
931  char *line = NULL;
932  size_t linesz = 0;
933  /* We can't just use fscanf here because it's not easy to distinguish \n
934     from other whitespace so as to take the optional word following the
935     address but always stop at the end of the line.  */
936  while (getline (&line, &linesz, f) > 0
937	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
938		    modname, &modsz, &modaddr) == 3)
939    {
940      Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
941						     modaddr, modaddr + modsz);
942      if (mod == NULL)
943	{
944	  result = -1;
945	  break;
946	}
947
948      result = check_module_notes (mod);
949    }
950  free (line);
951
952  if (result == 0)
953    result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
954
955  fclose (f);
956
957  return result;
958}
959INTDEF (dwfl_linux_kernel_report_modules)
960