linux-kernel-modules.c revision a3a76e361527b057fc4cf8a8a8ce97a33dd59198
1/* Standard libdwfl callbacks for debugging the running Linux kernel.
2   Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3   This file is part of elfutils.
4
5   This file is free software; you can redistribute it and/or modify
6   it under the terms of either
7
8     * the GNU Lesser General Public License as published by the Free
9       Software Foundation; either version 3 of the License, or (at
10       your option) any later version
11
12   or
13
14     * the GNU General Public License as published by the Free
15       Software Foundation; either version 2 of the License, or (at
16       your option) any later version
17
18   or both in parallel, as here.
19
20   elfutils is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received copies of the GNU General Public License and
26   the GNU Lesser General Public License along with this program.  If
27   not, see <http://www.gnu.org/licenses/>.  */
28
29/* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
30   Everything we need here is fine if its declarations just come first.  */
31
32#include <fts.h>
33
34#include <config.h>
35
36#include "libdwflP.h"
37#include <inttypes.h>
38#include <errno.h>
39#include <stdio.h>
40#include <stdio_ext.h>
41#include <string.h>
42#include <stdlib.h>
43#include <sys/utsname.h>
44#include <fcntl.h>
45#include <unistd.h>
46
47
48#define KERNEL_MODNAME	"kernel"
49
50#define MODULEDIRFMT	"/lib/modules/%s"
51
52#define KNOTESFILE	"/sys/kernel/notes"
53#define	MODNOTESFMT	"/sys/module/%s/notes"
54#define KSYMSFILE	"/proc/kallsyms"
55#define MODULELIST	"/proc/modules"
56#define	SECADDRDIRFMT	"/sys/module/%s/sections/"
57#define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
58
59
60#if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
61static const char *vmlinux_suffixes[] =
62  {
63#ifdef USE_ZLIB
64    ".gz",
65#endif
66#ifdef USE_BZLIB
67    ".bz2",
68#endif
69#ifdef USE_LZMA
70    ".xz",
71#endif
72  };
73#endif
74
75/* Try to open the given file as it is or under the debuginfo directory.  */
76static int
77try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
78{
79  if (*fname == NULL)
80    return -1;
81
82  /* Don't bother trying *FNAME itself here if the path will cause it to be
83     tried because we give its own basename as DEBUGLINK_FILE.  */
84  int fd = ((((dwfl->callbacks->debuginfo_path
85	       ? *dwfl->callbacks->debuginfo_path : NULL)
86	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
87	    : TEMP_FAILURE_RETRY (open64 (*fname, O_RDONLY)));
88
89  if (fd < 0)
90    {
91      Dwfl_Module fakemod = { .dwfl = dwfl };
92      /* First try the file's unadorned basename as DEBUGLINK_FILE,
93	 to look for "vmlinux" files.  */
94      fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
95						 *fname, basename (*fname), 0,
96						 &fakemod.debug.name);
97      if (fd < 0 && try_debug)
98	/* Next, let the call use the default of basename + ".debug",
99	   to look for "vmlinux.debug" files.  */
100	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
101						   *fname, NULL, 0,
102						   &fakemod.debug.name);
103      if (fakemod.debug.name != NULL)
104	{
105	  free (*fname);
106	  *fname = fakemod.debug.name;
107	}
108    }
109
110#if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
111  if (fd < 0)
112    for (size_t i = 0;
113	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
114	 ++i)
115      {
116	char *zname;
117	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
118	  {
119	    fd = TEMP_FAILURE_RETRY (open64 (zname, O_RDONLY));
120	    if (fd < 0)
121	      free (zname);
122	    else
123	      {
124		free (*fname);
125		*fname = zname;
126	      }
127	  }
128      }
129#endif
130
131  if (fd < 0)
132    {
133      free (*fname);
134      *fname = NULL;
135    }
136
137  return fd;
138}
139
140static inline const char *
141kernel_release (void)
142{
143  /* Cache the `uname -r` string we'll use.  */
144  static struct utsname utsname;
145  if (utsname.release[0] == '\0' && uname (&utsname) != 0)
146    return NULL;
147  return utsname.release;
148}
149
150static int
151find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
152{
153  if ((release[0] == '/'
154       ? asprintf (fname, "%s/vmlinux", release)
155       : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
156    return -1;
157
158  int fd = try_kernel_name (dwfl, fname, true);
159  if (fd < 0 && release[0] != '/')
160    {
161      free (*fname);
162      if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
163	return -1;
164      fd = try_kernel_name (dwfl, fname, true);
165    }
166
167  return fd;
168}
169
170static int
171get_release (Dwfl *dwfl, const char **release)
172{
173  if (dwfl == NULL)
174    return -1;
175
176  const char *release_string = release == NULL ? NULL : *release;
177  if (release_string == NULL)
178    {
179      release_string = kernel_release ();
180      if (release_string == NULL)
181	return errno;
182      if (release != NULL)
183	*release = release_string;
184    }
185
186  return 0;
187}
188
189static int
190report_kernel (Dwfl *dwfl, const char **release,
191	       int (*predicate) (const char *module, const char *file))
192{
193  int result = get_release (dwfl, release);
194  if (unlikely (result != 0))
195    return result;
196
197  char *fname;
198  int fd = find_kernel_elf (dwfl, *release, &fname);
199
200  if (fd < 0)
201    result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
202	      ? 0 : errno ?: ENOENT);
203  else
204    {
205      bool report = true;
206
207      if (predicate != NULL)
208	{
209	  /* Let the predicate decide whether to use this one.  */
210	  int want = (*predicate) (KERNEL_MODNAME, fname);
211	  if (want < 0)
212	    result = errno;
213	  report = want > 0;
214	}
215
216      if (report)
217	{
218	  /* Note that on some architectures (e.g. x86_64) the vmlinux
219	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
220	     In both cases the phdr p_vaddr load address will be non-zero.
221	     We want the image to be placed as if it was ET_DYN, so
222	     pass true for add_p_vaddr which will do the right thing
223	     (in combination with a zero base) in either case.  */
224	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
225						      fname, fd, 0, true);
226	  if (mod == NULL)
227	    result = -1;
228	  else
229	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
230	    mod->e_type = ET_DYN;
231	}
232
233      free (fname);
234
235      if (!report || result < 0)
236	close (fd);
237    }
238
239  return result;
240}
241
242/* Look for a kernel debug archive.  If we find one, report all its modules.
243   If not, return ENOENT.  */
244static int
245report_kernel_archive (Dwfl *dwfl, const char **release,
246		       int (*predicate) (const char *module, const char *file))
247{
248  int result = get_release (dwfl, release);
249  if (unlikely (result != 0))
250    return result;
251
252  char *archive;
253  int res = (((*release)[0] == '/')
254	     ? asprintf (&archive, "%s/debug.a", *release)
255	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
256  if (unlikely (res < 0))
257    return ENOMEM;
258
259  int fd = try_kernel_name (dwfl, &archive, false);
260  if (fd < 0)
261    result = errno ?: ENOENT;
262  else
263    {
264      /* We have the archive file open!  */
265      Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
266						    true, predicate);
267      if (unlikely (last == NULL))
268	result = -1;
269      else
270	{
271	  /* Find the kernel and move it to the head of the list.  */
272	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
273	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
274	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
275	      {
276		*prevp = m->next;
277		m->next = *tailp;
278		*tailp = m;
279		break;
280	      }
281	}
282    }
283
284  free (archive);
285  return result;
286}
287
288static size_t
289check_suffix (const FTSENT *f, size_t namelen)
290{
291#define TRY(sfx)							\
292  if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
293       : f->fts_namelen >= sizeof sfx)					\
294      && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
295		  sfx, sizeof sfx))					\
296    return sizeof sfx - 1
297
298  TRY (".ko");
299#if USE_ZLIB
300  TRY (".ko.gz");
301#endif
302#if USE_BZLIB
303  TRY (".ko.bz2");
304#endif
305#if USE_LZMA
306  TRY (".ko.xz");
307#endif
308
309  return 0;
310
311#undef	TRY
312}
313
314/* Report a kernel and all its modules found on disk, for offline use.
315   If RELEASE starts with '/', it names a directory to look in;
316   if not, it names a directory to find under /lib/modules/;
317   if null, /lib/modules/`uname -r` is used.
318   Returns zero on success, -1 if dwfl_report_module failed,
319   or an errno code if finding the files on disk failed.  */
320int
321dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
322				  int (*predicate) (const char *module,
323						    const char *file))
324{
325  int result = report_kernel_archive (dwfl, &release, predicate);
326  if (result != ENOENT)
327    return result;
328
329  /* First report the kernel.  */
330  result = report_kernel (dwfl, &release, predicate);
331  if (result == 0)
332    {
333      /* Do "find /lib/modules/RELEASE -name *.ko".  */
334
335      char *modulesdir[] = { NULL, NULL };
336      if (release[0] == '/')
337	modulesdir[0] = (char *) release;
338      else
339	{
340	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
341	    return errno;
342	}
343
344      FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
345      if (modulesdir[0] == (char *) release)
346	modulesdir[0] = NULL;
347      if (fts == NULL)
348	{
349	  free (modulesdir[0]);
350	  return errno;
351	}
352
353      FTSENT *f;
354      while ((f = fts_read (fts)) != NULL)
355	{
356	  /* Skip a "source" subtree, which tends to be large.
357	     This insane hard-coding of names is what depmod does too.  */
358	  if (f->fts_namelen == sizeof "source" - 1
359	      && !strcmp (f->fts_name, "source"))
360	    {
361	      fts_set (fts, f, FTS_SKIP);
362	      continue;
363	    }
364
365	  switch (f->fts_info)
366	    {
367	    case FTS_F:
368	    case FTS_SL:
369	    case FTS_NSOK:;
370	      /* See if this file name matches "*.ko".  */
371	      const size_t suffix = check_suffix (f, 0);
372	      if (suffix)
373		{
374		  /* We have a .ko file to report.  Following the algorithm
375		     by which the kernel makefiles set KBUILD_MODNAME, we
376		     replace all ',' or '-' with '_' in the file name and
377		     call that the module name.  Modules could well be
378		     built using different embedded names than their file
379		     names.  To handle that, we would have to look at the
380		     __this_module.name contents in the module's text.  */
381
382		  char *name = strndup (f->fts_name, f->fts_namelen - suffix);
383		  if (unlikely (name == NULL))
384		    {
385		      __libdwfl_seterrno (DWFL_E_NOMEM);
386		      result = -1;
387		      break;
388		    }
389		  for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
390		    if (name[i] == '-' || name[i] == ',')
391		      name[i] = '_';
392
393		  if (predicate != NULL)
394		    {
395		      /* Let the predicate decide whether to use this one.  */
396		      int want = (*predicate) (name, f->fts_path);
397		      if (want < 0)
398			{
399			  result = -1;
400			  free (name);
401			  break;
402			}
403		      if (!want)
404			{
405			  free (name);
406			  continue;
407			}
408		    }
409
410		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
411		    {
412		      free (name);
413		      result = -1;
414		      break;
415		    }
416		  free (name);
417		}
418	      continue;
419
420	    case FTS_ERR:
421	    case FTS_DNR:
422	    case FTS_NS:
423	      result = f->fts_errno;
424	      break;
425
426	    case FTS_SLNONE:
427	    default:
428	      continue;
429	    }
430
431	  /* We only get here in error cases.  */
432	  break;
433	}
434      fts_close (fts);
435      free (modulesdir[0]);
436    }
437
438  return result;
439}
440INTDEF (dwfl_linux_kernel_report_offline)
441
442
443/* State of read_address used by intuit_kernel_bounds. */
444struct read_address_state {
445  FILE *f;
446  char *line;
447  size_t linesz;
448  size_t n;
449  char *p;
450  const char *type;
451};
452
453static inline bool
454read_address (struct read_address_state *state, Dwarf_Addr *addr)
455{
456  if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
457      state->line[state->n - 2] == ']')
458    return false;
459  *addr = strtoull (state->line, &state->p, 16);
460  state->p += strspn (state->p, " \t");
461  state->type = strsep (&state->p, " \t\n");
462  if (state->type == NULL)
463    return false;
464  return state->p != NULL && state->p != state->line;
465}
466
467
468/* Grovel around to guess the bounds of the runtime kernel image.  */
469static int
470intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
471{
472  struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
473
474  state.f = fopen (KSYMSFILE, "r");
475  if (state.f == NULL)
476    return errno;
477
478  (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
479
480  *notes = 0;
481
482  int result;
483  do
484    result = read_address (&state, start) ? 0 : -1;
485  while (result == 0 && strchr ("TtRr", *state.type) == NULL);
486
487  if (result == 0)
488    {
489      *end = *start;
490      while (read_address (&state, end))
491	if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
492	  *notes = *end;
493
494      Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
495      *start &= -(Dwarf_Addr) round_kernel;
496      *end += round_kernel - 1;
497      *end &= -(Dwarf_Addr) round_kernel;
498      if (*start >= *end || *end - *start < round_kernel)
499	result = -1;
500    }
501  free (state.line);
502
503  if (result == -1)
504    result = ferror_unlocked (state.f) ? errno : ENOEXEC;
505
506  fclose (state.f);
507
508  return result;
509}
510
511
512/* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
513static int
514check_notes (Dwfl_Module *mod, const char *notesfile,
515	     Dwarf_Addr vaddr, const char *secname)
516{
517  int fd = open64 (notesfile, O_RDONLY);
518  if (fd < 0)
519    return 1;
520
521  assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
522  assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
523  union
524  {
525    GElf_Nhdr nhdr;
526    unsigned char data[8192];
527  } buf;
528
529  ssize_t n = read (fd, buf.data, sizeof buf);
530  close (fd);
531
532  if (n <= 0)
533    return 1;
534
535  unsigned char *p = buf.data;
536  while (p < &buf.data[n])
537    {
538      /* No translation required since we are reading the native kernel.  */
539      GElf_Nhdr *nhdr = (void *) p;
540      p += sizeof *nhdr;
541      unsigned char *name = p;
542      p += (nhdr->n_namesz + 3) & -4U;
543      unsigned char *bits = p;
544      p += (nhdr->n_descsz + 3) & -4U;
545
546      if (p <= &buf.data[n]
547	  && nhdr->n_type == NT_GNU_BUILD_ID
548	  && nhdr->n_namesz == sizeof "GNU"
549	  && !memcmp (name, "GNU", sizeof "GNU"))
550	{
551	  /* Found it.  For a module we must figure out its VADDR now.  */
552
553	  if (secname != NULL
554	      && (INTUSE(dwfl_linux_kernel_module_section_address)
555		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
556		  || vaddr == (GElf_Addr) -1l))
557	    vaddr = 0;
558
559	  if (vaddr != 0)
560	    vaddr += bits - buf.data;
561	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
562						      nhdr->n_descsz, vaddr);
563	}
564    }
565
566  return 0;
567}
568
569/* Look for a build ID for the kernel.  */
570static int
571check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
572{
573  return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
574}
575
576/* Look for a build ID for a loaded kernel module.  */
577static int
578check_module_notes (Dwfl_Module *mod)
579{
580  char *dirs[2] = { NULL, NULL };
581  if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
582    return ENOMEM;
583
584  FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
585  if (fts == NULL)
586    {
587      free (dirs[0]);
588      return 0;
589    }
590
591  int result = 0;
592  FTSENT *f;
593  while ((f = fts_read (fts)) != NULL)
594    {
595      switch (f->fts_info)
596	{
597	case FTS_F:
598	case FTS_SL:
599	case FTS_NSOK:
600	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
601	  if (result > 0)	/* Nothing found.  */
602	    {
603	      result = 0;
604	      continue;
605	    }
606	  break;
607
608	case FTS_ERR:
609	case FTS_DNR:
610	  result = f->fts_errno;
611	  break;
612
613	case FTS_NS:
614	case FTS_SLNONE:
615	default:
616	  continue;
617	}
618
619      /* We only get here when finished or in error cases.  */
620      break;
621    }
622  fts_close (fts);
623  free (dirs[0]);
624
625  return result;
626}
627
628int
629dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
630{
631  Dwarf_Addr start = 0;
632  Dwarf_Addr end = 0;
633
634  #define report() \
635    (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
636
637  /* This is a bit of a kludge.  If we already reported the kernel,
638     don't bother figuring it out again--it never changes.  */
639  for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
640    if (!strcmp (m->name, KERNEL_MODNAME))
641      {
642	start = m->low_addr;
643	end = m->high_addr;
644	return report () == NULL ? -1 : 0;
645      }
646
647  /* Try to figure out the bounds of the kernel image without
648     looking for any vmlinux file.  */
649  Dwarf_Addr notes;
650  /* The compiler cannot deduce that if intuit_kernel_bounds returns
651     zero NOTES will be initialized.  Fake the initialization.  */
652  asm ("" : "=m" (notes));
653  int result = intuit_kernel_bounds (&start, &end, &notes);
654  if (result == 0)
655    {
656      Dwfl_Module *mod = report ();
657      return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
658    }
659  if (result != ENOENT)
660    return result;
661
662  /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
663  return report_kernel (dwfl, NULL, NULL);
664}
665INTDEF (dwfl_linux_kernel_report_kernel)
666
667
668static inline bool
669subst_name (char from, char to,
670            const char * const module_name,
671            char * const alternate_name,
672            const size_t namelen)
673{
674  const char *n = memchr (module_name, from, namelen);
675  if (n == NULL)
676    return false;
677  char *a = mempcpy (alternate_name, module_name, n - module_name);
678  *a++ = to;
679  ++n;
680  const char *p;
681  while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
682    {
683      a = mempcpy (a, n, p - n);
684      *a++ = to;
685      n = p + 1;
686    }
687  memcpy (a, n, namelen - (n - module_name) + 1);
688  return true;
689}
690
691/* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
692
693int
694dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
695			    void **userdata __attribute__ ((unused)),
696			    const char *module_name,
697			    Dwarf_Addr base __attribute__ ((unused)),
698			    char **file_name, Elf **elfp)
699{
700  if (mod->build_id_len > 0)
701    {
702      int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
703					       file_name, elfp);
704      if (fd >= 0 || mod->main.elf != NULL || errno != 0)
705	return fd;
706    }
707
708  const char *release = kernel_release ();
709  if (release == NULL)
710    return errno;
711
712  if (!strcmp (module_name, KERNEL_MODNAME))
713    return find_kernel_elf (mod->dwfl, release, file_name);
714
715  /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
716
717  char *modulesdir[] = { NULL, NULL };
718  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
719    return -1;
720
721  FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
722  if (fts == NULL)
723    {
724      free (modulesdir[0]);
725      return -1;
726    }
727
728  size_t namelen = strlen (module_name);
729
730  /* This is a kludge.  There is no actual necessary relationship between
731     the name of the .ko file installed and the module name the kernel
732     knows it by when it's loaded.  The kernel's only idea of the module
733     name comes from the name embedded in the object's magic
734     .gnu.linkonce.this_module section.
735
736     In practice, these module names match the .ko file names except for
737     some using '_' and some using '-'.  So our cheap kludge is to look for
738     two files when either a '_' or '-' appears in a module name, one using
739     only '_' and one only using '-'.  */
740
741  char *alternate_name = malloc (namelen + 1);
742  if (unlikely (alternate_name == NULL))
743    {
744      free (modulesdir[0]);
745      return ENOMEM;
746    }
747  if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
748      !subst_name ('_', '-', module_name, alternate_name, namelen))
749    alternate_name[0] = '\0';
750
751  FTSENT *f;
752  int error = ENOENT;
753  while ((f = fts_read (fts)) != NULL)
754    {
755      /* Skip a "source" subtree, which tends to be large.
756	 This insane hard-coding of names is what depmod does too.  */
757      if (f->fts_namelen == sizeof "source" - 1
758	  && !strcmp (f->fts_name, "source"))
759	{
760	  fts_set (fts, f, FTS_SKIP);
761	  continue;
762	}
763
764      error = ENOENT;
765      switch (f->fts_info)
766	{
767	case FTS_F:
768	case FTS_SL:
769	case FTS_NSOK:
770	  /* See if this file name is "MODULE_NAME.ko".  */
771	  if (check_suffix (f, namelen)
772	      && (!memcmp (f->fts_name, module_name, namelen)
773		  || !memcmp (f->fts_name, alternate_name, namelen)))
774	    {
775	      int fd = open64 (f->fts_accpath, O_RDONLY);
776	      *file_name = strdup (f->fts_path);
777	      fts_close (fts);
778	      free (modulesdir[0]);
779	      free (alternate_name);
780	      if (fd < 0)
781		free (*file_name);
782	      else if (*file_name == NULL)
783		{
784		  close (fd);
785		  fd = -1;
786		}
787	      return fd;
788	    }
789	  break;
790
791	case FTS_ERR:
792	case FTS_DNR:
793	case FTS_NS:
794	  error = f->fts_errno;
795	  break;
796
797	case FTS_SLNONE:
798	default:
799	  break;
800	}
801    }
802
803  fts_close (fts);
804  free (modulesdir[0]);
805  free (alternate_name);
806  errno = error;
807  return -1;
808}
809INTDEF (dwfl_linux_kernel_find_elf)
810
811
812/* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
813   We read the information from /sys/module directly.  */
814
815int
816dwfl_linux_kernel_module_section_address
817(Dwfl_Module *mod __attribute__ ((unused)),
818 void **userdata __attribute__ ((unused)),
819 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
820 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
821 const GElf_Shdr *shdr __attribute__ ((unused)),
822 Dwarf_Addr *addr)
823{
824  char *sysfile;
825  if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
826    return DWARF_CB_ABORT;
827
828  FILE *f = fopen (sysfile, "r");
829  free (sysfile);
830
831  if (f == NULL)
832    {
833      if (errno == ENOENT)
834	{
835	  /* The .modinfo and .data.percpu sections are never kept
836	     loaded in the kernel.  If the kernel was compiled without
837	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
838	     actually loaded at all.
839
840	     Setting *ADDR to -1 tells the caller this section is
841	     actually absent from memory.  */
842
843	  if (!strcmp (secname, ".modinfo")
844	      || !strcmp (secname, ".data.percpu")
845	      || !strncmp (secname, ".exit", 5))
846	    {
847	      *addr = (Dwarf_Addr) -1l;
848	      return DWARF_CB_OK;
849	    }
850
851	  /* The goofy PPC64 module_frob_arch_sections function tweaks
852	     the section names as a way to control other kernel code's
853	     behavior, and this cruft leaks out into the /sys information.
854	     The file name for ".init*" may actually look like "_init*".  */
855
856	  const bool is_init = !strncmp (secname, ".init", 5);
857	  if (is_init)
858	    {
859	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
860			    modname, &secname[1]) < 0)
861		return ENOMEM;
862	      f = fopen (sysfile, "r");
863	      free (sysfile);
864	      if (f != NULL)
865		goto ok;
866	    }
867
868	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
869	     In case that size increases in the future, look for longer
870	     truncated names first.  */
871	  size_t namelen = strlen (secname);
872	  if (namelen >= MODULE_SECT_NAME_LEN)
873	    {
874	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
875				  modname, secname);
876	      if (len < 0)
877		return DWARF_CB_ABORT;
878	      char *end = sysfile + len;
879	      do
880		{
881		  *--end = '\0';
882		  f = fopen (sysfile, "r");
883		  if (is_init && f == NULL && errno == ENOENT)
884		    {
885		      sysfile[len - namelen] = '_';
886		      f = fopen (sysfile, "r");
887		      sysfile[len - namelen] = '.';
888		    }
889		}
890	      while (f == NULL && errno == ENOENT
891		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
892	      free (sysfile);
893
894	      if (f != NULL)
895		goto ok;
896	    }
897	}
898
899      return DWARF_CB_ABORT;
900    }
901
902 ok:
903  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
904
905  int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
906		: ferror_unlocked (f) ? errno : ENOEXEC);
907  fclose (f);
908
909  if (result == 0)
910    return DWARF_CB_OK;
911
912  errno = result;
913  return DWARF_CB_ABORT;
914}
915INTDEF (dwfl_linux_kernel_module_section_address)
916
917int
918dwfl_linux_kernel_report_modules (Dwfl *dwfl)
919{
920  FILE *f = fopen (MODULELIST, "r");
921  if (f == NULL)
922    return errno;
923
924  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
925
926  int result = 0;
927  Dwarf_Addr modaddr;
928  unsigned long int modsz;
929  char modname[128];
930  char *line = NULL;
931  size_t linesz = 0;
932  /* We can't just use fscanf here because it's not easy to distinguish \n
933     from other whitespace so as to take the optional word following the
934     address but always stop at the end of the line.  */
935  while (getline (&line, &linesz, f) > 0
936	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
937		    modname, &modsz, &modaddr) == 3)
938    {
939      Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
940						     modaddr, modaddr + modsz);
941      if (mod == NULL)
942	{
943	  result = -1;
944	  break;
945	}
946
947      result = check_module_notes (mod);
948    }
949  free (line);
950
951  if (result == 0)
952    result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
953
954  fclose (f);
955
956  return result;
957}
958INTDEF (dwfl_linux_kernel_report_modules)
959