linux-kernel-modules.c revision dc6526a51317dbaf812df26108da614703050770
1/* Standard libdwfl callbacks for debugging the running Linux kernel.
2   Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3   This file is part of elfutils.
4
5   This file is free software; you can redistribute it and/or modify
6   it under the terms of either
7
8     * the GNU Lesser General Public License as published by the Free
9       Software Foundation; either version 3 of the License, or (at
10       your option) any later version
11
12   or
13
14     * the GNU General Public License as published by the Free
15       Software Foundation; either version 2 of the License, or (at
16       your option) any later version
17
18   or both in parallel, as here.
19
20   elfutils is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received copies of the GNU General Public License and
26   the GNU Lesser General Public License along with this program.  If
27   not, see <http://www.gnu.org/licenses/>.  */
28
29/* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
30   Everything we need here is fine if its declarations just come first.  */
31/* Some makefiles, e.g. HOST_linux-x86.mk, predefine _FILE_OFFSET_BITS.  */
32#undef _FILE_OFFSET_BITS
33#include <fts.h>
34
35#include <config.h>
36
37#include "libdwflP.h"
38#include <inttypes.h>
39#include <errno.h>
40#include <stdio.h>
41#include <stdio_ext.h>
42#include <string.h>
43#include <stdlib.h>
44#include <sys/utsname.h>
45#include <fcntl.h>
46#include <unistd.h>
47
48/* Since fts.h is included before config.h, its indirect inclusions may not
49   give us the right LFS aliases of these functions, so map them manually.  */
50#ifdef _FILE_OFFSET_BITS
51#define open open64
52#define fopen fopen64
53#endif
54
55
56#define KERNEL_MODNAME	"kernel"
57
58#define MODULEDIRFMT	"/lib/modules/%s"
59
60#define KNOTESFILE	"/sys/kernel/notes"
61#define	MODNOTESFMT	"/sys/module/%s/notes"
62#define KSYMSFILE	"/proc/kallsyms"
63#define MODULELIST	"/proc/modules"
64#define	SECADDRDIRFMT	"/sys/module/%s/sections/"
65#define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
66
67
68#if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
69static const char *vmlinux_suffixes[] =
70  {
71#ifdef USE_ZLIB
72    ".gz",
73#endif
74#ifdef USE_BZLIB
75    ".bz2",
76#endif
77#ifdef USE_LZMA
78    ".xz",
79#endif
80  };
81#endif
82
83/* Try to open the given file as it is or under the debuginfo directory.  */
84static int
85try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
86{
87  if (*fname == NULL)
88    return -1;
89
90  /* Don't bother trying *FNAME itself here if the path will cause it to be
91     tried because we give its own basename as DEBUGLINK_FILE.  */
92  int fd = ((((dwfl->callbacks->debuginfo_path
93	       ? *dwfl->callbacks->debuginfo_path : NULL)
94	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
95	    : TEMP_FAILURE_RETRY (open (*fname, O_RDONLY)));
96
97  if (fd < 0)
98    {
99      Dwfl_Module fakemod = { .dwfl = dwfl };
100      /* First try the file's unadorned basename as DEBUGLINK_FILE,
101	 to look for "vmlinux" files.  */
102      fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
103						 *fname, basename (*fname), 0,
104						 &fakemod.debug.name);
105      if (fd < 0 && try_debug)
106	/* Next, let the call use the default of basename + ".debug",
107	   to look for "vmlinux.debug" files.  */
108	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
109						   *fname, NULL, 0,
110						   &fakemod.debug.name);
111      if (fakemod.debug.name != NULL)
112	{
113	  free (*fname);
114	  *fname = fakemod.debug.name;
115	}
116    }
117
118#if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
119  if (fd < 0)
120    for (size_t i = 0;
121	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
122	 ++i)
123      {
124	char *zname;
125	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
126	  {
127	    fd = TEMP_FAILURE_RETRY (open (zname, O_RDONLY));
128	    if (fd < 0)
129	      free (zname);
130	    else
131	      {
132		free (*fname);
133		*fname = zname;
134	      }
135	  }
136      }
137#endif
138
139  if (fd < 0)
140    {
141      free (*fname);
142      *fname = NULL;
143    }
144
145  return fd;
146}
147
148static inline const char *
149kernel_release (void)
150{
151  /* Cache the `uname -r` string we'll use.  */
152  static struct utsname utsname;
153  if (utsname.release[0] == '\0' && uname (&utsname) != 0)
154    return NULL;
155  return utsname.release;
156}
157
158static int
159find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
160{
161  if ((release[0] == '/'
162       ? asprintf (fname, "%s/vmlinux", release)
163       : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
164    return -1;
165
166  int fd = try_kernel_name (dwfl, fname, true);
167  if (fd < 0 && release[0] != '/')
168    {
169      free (*fname);
170      if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
171	return -1;
172      fd = try_kernel_name (dwfl, fname, true);
173    }
174
175  return fd;
176}
177
178static int
179get_release (Dwfl *dwfl, const char **release)
180{
181  if (dwfl == NULL)
182    return -1;
183
184  const char *release_string = release == NULL ? NULL : *release;
185  if (release_string == NULL)
186    {
187      release_string = kernel_release ();
188      if (release_string == NULL)
189	return errno;
190      if (release != NULL)
191	*release = release_string;
192    }
193
194  return 0;
195}
196
197static int
198report_kernel (Dwfl *dwfl, const char **release,
199	       int (*predicate) (const char *module, const char *file))
200{
201  int result = get_release (dwfl, release);
202  if (unlikely (result != 0))
203    return result;
204
205  char *fname;
206  int fd = find_kernel_elf (dwfl, *release, &fname);
207
208  if (fd < 0)
209    result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
210	      ? 0 : errno ?: ENOENT);
211  else
212    {
213      bool report = true;
214
215      if (predicate != NULL)
216	{
217	  /* Let the predicate decide whether to use this one.  */
218	  int want = (*predicate) (KERNEL_MODNAME, fname);
219	  if (want < 0)
220	    result = errno;
221	  report = want > 0;
222	}
223
224      if (report)
225	{
226	  /* Note that on some architectures (e.g. x86_64) the vmlinux
227	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
228	     In both cases the phdr p_vaddr load address will be non-zero.
229	     We want the image to be placed as if it was ET_DYN, so
230	     pass true for add_p_vaddr which will do the right thing
231	     (in combination with a zero base) in either case.  */
232	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
233						      fname, fd, 0, true);
234	  if (mod == NULL)
235	    result = -1;
236	  else
237	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
238	    mod->e_type = ET_DYN;
239	}
240
241      free (fname);
242
243      if (!report || result < 0)
244	close (fd);
245    }
246
247  return result;
248}
249
250/* Look for a kernel debug archive.  If we find one, report all its modules.
251   If not, return ENOENT.  */
252static int
253report_kernel_archive (Dwfl *dwfl, const char **release,
254		       int (*predicate) (const char *module, const char *file))
255{
256  int result = get_release (dwfl, release);
257  if (unlikely (result != 0))
258    return result;
259
260  char *archive;
261  int res = (((*release)[0] == '/')
262	     ? asprintf (&archive, "%s/debug.a", *release)
263	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
264  if (unlikely (res < 0))
265    return ENOMEM;
266
267  int fd = try_kernel_name (dwfl, &archive, false);
268  if (fd < 0)
269    result = errno ?: ENOENT;
270  else
271    {
272      /* We have the archive file open!  */
273      Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
274						    true, predicate);
275      if (unlikely (last == NULL))
276	result = -1;
277      else
278	{
279	  /* Find the kernel and move it to the head of the list.  */
280	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
281	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
282	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
283	      {
284		*prevp = m->next;
285		m->next = *tailp;
286		*tailp = m;
287		break;
288	      }
289	}
290    }
291
292  free (archive);
293  return result;
294}
295
296static size_t
297check_suffix (const FTSENT *f, size_t namelen)
298{
299#define TRY(sfx)							\
300  if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
301       : f->fts_namelen >= sizeof sfx)					\
302      && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
303		  sfx, sizeof sfx))					\
304    return sizeof sfx - 1
305
306  TRY (".ko");
307#if USE_ZLIB
308  TRY (".ko.gz");
309#endif
310#if USE_BZLIB
311  TRY (".ko.bz2");
312#endif
313#if USE_LZMA
314  TRY (".ko.xz");
315#endif
316
317  return 0;
318
319#undef	TRY
320}
321
322/* Report a kernel and all its modules found on disk, for offline use.
323   If RELEASE starts with '/', it names a directory to look in;
324   if not, it names a directory to find under /lib/modules/;
325   if null, /lib/modules/`uname -r` is used.
326   Returns zero on success, -1 if dwfl_report_module failed,
327   or an errno code if finding the files on disk failed.  */
328int
329dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
330				  int (*predicate) (const char *module,
331						    const char *file))
332{
333  int result = report_kernel_archive (dwfl, &release, predicate);
334  if (result != ENOENT)
335    return result;
336
337  /* First report the kernel.  */
338  result = report_kernel (dwfl, &release, predicate);
339  if (result == 0)
340    {
341      /* Do "find /lib/modules/RELEASE -name *.ko".  */
342
343      char *modulesdir[] = { NULL, NULL };
344      if (release[0] == '/')
345	modulesdir[0] = (char *) release;
346      else
347	{
348	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
349	    return errno;
350	}
351
352      FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
353      if (modulesdir[0] == (char *) release)
354	modulesdir[0] = NULL;
355      if (fts == NULL)
356	{
357	  free (modulesdir[0]);
358	  return errno;
359	}
360
361      FTSENT *f;
362      while ((f = fts_read (fts)) != NULL)
363	{
364	  /* Skip a "source" subtree, which tends to be large.
365	     This insane hard-coding of names is what depmod does too.  */
366	  if (f->fts_namelen == sizeof "source" - 1
367	      && !strcmp (f->fts_name, "source"))
368	    {
369	      fts_set (fts, f, FTS_SKIP);
370	      continue;
371	    }
372
373	  switch (f->fts_info)
374	    {
375	    case FTS_F:
376	    case FTS_SL:
377	    case FTS_NSOK:;
378	      /* See if this file name matches "*.ko".  */
379	      const size_t suffix = check_suffix (f, 0);
380	      if (suffix)
381		{
382		  /* We have a .ko file to report.  Following the algorithm
383		     by which the kernel makefiles set KBUILD_MODNAME, we
384		     replace all ',' or '-' with '_' in the file name and
385		     call that the module name.  Modules could well be
386		     built using different embedded names than their file
387		     names.  To handle that, we would have to look at the
388		     __this_module.name contents in the module's text.  */
389
390		  char *name = strndup (f->fts_name, f->fts_namelen - suffix);
391		  if (unlikely (name == NULL))
392		    {
393		      __libdwfl_seterrno (DWFL_E_NOMEM);
394		      result = -1;
395		      break;
396		    }
397		  for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
398		    if (name[i] == '-' || name[i] == ',')
399		      name[i] = '_';
400
401		  if (predicate != NULL)
402		    {
403		      /* Let the predicate decide whether to use this one.  */
404		      int want = (*predicate) (name, f->fts_path);
405		      if (want < 0)
406			{
407			  result = -1;
408			  free (name);
409			  break;
410			}
411		      if (!want)
412			{
413			  free (name);
414			  continue;
415			}
416		    }
417
418		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
419		    {
420		      free (name);
421		      result = -1;
422		      break;
423		    }
424		  free (name);
425		}
426	      continue;
427
428	    case FTS_ERR:
429	    case FTS_DNR:
430	    case FTS_NS:
431	      result = f->fts_errno;
432	      break;
433
434	    case FTS_SLNONE:
435	    default:
436	      continue;
437	    }
438
439	  /* We only get here in error cases.  */
440	  break;
441	}
442      fts_close (fts);
443      free (modulesdir[0]);
444    }
445
446  return result;
447}
448INTDEF (dwfl_linux_kernel_report_offline)
449
450
451/* State of read_address used by intuit_kernel_bounds. */
452struct read_address_state {
453  FILE *f;
454  char *line;
455  size_t linesz;
456  size_t n;
457  char *p;
458  const char *type;
459};
460
461static inline bool
462read_address (struct read_address_state *state, Dwarf_Addr *addr)
463{
464  if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
465      state->line[state->n - 2] == ']')
466    return false;
467  *addr = strtoull (state->line, &state->p, 16);
468  state->p += strspn (state->p, " \t");
469  state->type = strsep (&state->p, " \t\n");
470  if (state->type == NULL)
471    return false;
472  return state->p != NULL && state->p != state->line;
473}
474
475
476/* Grovel around to guess the bounds of the runtime kernel image.  */
477static int
478intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
479{
480  struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
481
482  state.f = fopen (KSYMSFILE, "r");
483  if (state.f == NULL)
484    return errno;
485
486  (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
487
488  *notes = 0;
489
490  int result;
491  do
492    result = read_address (&state, start) ? 0 : -1;
493  while (result == 0 && strchr ("TtRr", *state.type) == NULL);
494
495  if (result == 0)
496    {
497      *end = *start;
498      while (read_address (&state, end))
499	if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
500	  *notes = *end;
501
502      Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
503      *start &= -(Dwarf_Addr) round_kernel;
504      *end += round_kernel - 1;
505      *end &= -(Dwarf_Addr) round_kernel;
506      if (*start >= *end || *end - *start < round_kernel)
507	result = -1;
508    }
509  free (state.line);
510
511  if (result == -1)
512    result = ferror_unlocked (state.f) ? errno : ENOEXEC;
513
514  fclose (state.f);
515
516  return result;
517}
518
519
520/* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
521static int
522check_notes (Dwfl_Module *mod, const char *notesfile,
523	     Dwarf_Addr vaddr, const char *secname)
524{
525  int fd = open (notesfile, O_RDONLY);
526  if (fd < 0)
527    return 1;
528
529  assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
530  assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
531  union
532  {
533    GElf_Nhdr nhdr;
534    unsigned char data[8192];
535  } buf;
536
537  ssize_t n = read (fd, buf.data, sizeof buf);
538  close (fd);
539
540  if (n <= 0)
541    return 1;
542
543  unsigned char *p = buf.data;
544  while (p < &buf.data[n])
545    {
546      /* No translation required since we are reading the native kernel.  */
547      GElf_Nhdr *nhdr = (void *) p;
548      p += sizeof *nhdr;
549      unsigned char *name = p;
550      p += (nhdr->n_namesz + 3) & -4U;
551      unsigned char *bits = p;
552      p += (nhdr->n_descsz + 3) & -4U;
553
554      if (p <= &buf.data[n]
555	  && nhdr->n_type == NT_GNU_BUILD_ID
556	  && nhdr->n_namesz == sizeof "GNU"
557	  && !memcmp (name, "GNU", sizeof "GNU"))
558	{
559	  /* Found it.  For a module we must figure out its VADDR now.  */
560
561	  if (secname != NULL
562	      && (INTUSE(dwfl_linux_kernel_module_section_address)
563		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
564		  || vaddr == (GElf_Addr) -1l))
565	    vaddr = 0;
566
567	  if (vaddr != 0)
568	    vaddr += bits - buf.data;
569	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
570						      nhdr->n_descsz, vaddr);
571	}
572    }
573
574  return 0;
575}
576
577/* Look for a build ID for the kernel.  */
578static int
579check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
580{
581  return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
582}
583
584/* Look for a build ID for a loaded kernel module.  */
585static int
586check_module_notes (Dwfl_Module *mod)
587{
588  char *dirs[2] = { NULL, NULL };
589  if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
590    return ENOMEM;
591
592  FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
593  if (fts == NULL)
594    {
595      free (dirs[0]);
596      return 0;
597    }
598
599  int result = 0;
600  FTSENT *f;
601  while ((f = fts_read (fts)) != NULL)
602    {
603      switch (f->fts_info)
604	{
605	case FTS_F:
606	case FTS_SL:
607	case FTS_NSOK:
608	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
609	  if (result > 0)	/* Nothing found.  */
610	    {
611	      result = 0;
612	      continue;
613	    }
614	  break;
615
616	case FTS_ERR:
617	case FTS_DNR:
618	  result = f->fts_errno;
619	  break;
620
621	case FTS_NS:
622	case FTS_SLNONE:
623	default:
624	  continue;
625	}
626
627      /* We only get here when finished or in error cases.  */
628      break;
629    }
630  fts_close (fts);
631  free (dirs[0]);
632
633  return result;
634}
635
636int
637dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
638{
639  Dwarf_Addr start = 0;
640  Dwarf_Addr end = 0;
641
642  #define report() \
643    (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
644
645  /* This is a bit of a kludge.  If we already reported the kernel,
646     don't bother figuring it out again--it never changes.  */
647  for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
648    if (!strcmp (m->name, KERNEL_MODNAME))
649      {
650	start = m->low_addr;
651	end = m->high_addr;
652	return report () == NULL ? -1 : 0;
653      }
654
655  /* Try to figure out the bounds of the kernel image without
656     looking for any vmlinux file.  */
657  Dwarf_Addr notes;
658  /* The compiler cannot deduce that if intuit_kernel_bounds returns
659     zero NOTES will be initialized.  Fake the initialization.  */
660  asm ("" : "=m" (notes));
661  int result = intuit_kernel_bounds (&start, &end, &notes);
662  if (result == 0)
663    {
664      Dwfl_Module *mod = report ();
665      return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
666    }
667  if (result != ENOENT)
668    return result;
669
670  /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
671  return report_kernel (dwfl, NULL, NULL);
672}
673INTDEF (dwfl_linux_kernel_report_kernel)
674
675
676static inline bool
677subst_name (char from, char to,
678            const char * const module_name,
679            char * const alternate_name,
680            const size_t namelen)
681{
682  const char *n = memchr (module_name, from, namelen);
683  if (n == NULL)
684    return false;
685  char *a = mempcpy (alternate_name, module_name, n - module_name);
686  *a++ = to;
687  ++n;
688  const char *p;
689  while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
690    {
691      a = mempcpy (a, n, p - n);
692      *a++ = to;
693      n = p + 1;
694    }
695  memcpy (a, n, namelen - (n - module_name) + 1);
696  return true;
697}
698
699/* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
700
701int
702dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
703			    void **userdata __attribute__ ((unused)),
704			    const char *module_name,
705			    Dwarf_Addr base __attribute__ ((unused)),
706			    char **file_name, Elf **elfp)
707{
708  if (mod->build_id_len > 0)
709    {
710      int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
711					       file_name, elfp);
712      if (fd >= 0 || mod->main.elf != NULL || errno != 0)
713	return fd;
714    }
715
716  const char *release = kernel_release ();
717  if (release == NULL)
718    return errno;
719
720  if (!strcmp (module_name, KERNEL_MODNAME))
721    return find_kernel_elf (mod->dwfl, release, file_name);
722
723  /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
724
725  char *modulesdir[] = { NULL, NULL };
726  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
727    return -1;
728
729  FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
730  if (fts == NULL)
731    {
732      free (modulesdir[0]);
733      return -1;
734    }
735
736  size_t namelen = strlen (module_name);
737
738  /* This is a kludge.  There is no actual necessary relationship between
739     the name of the .ko file installed and the module name the kernel
740     knows it by when it's loaded.  The kernel's only idea of the module
741     name comes from the name embedded in the object's magic
742     .gnu.linkonce.this_module section.
743
744     In practice, these module names match the .ko file names except for
745     some using '_' and some using '-'.  So our cheap kludge is to look for
746     two files when either a '_' or '-' appears in a module name, one using
747     only '_' and one only using '-'.  */
748
749  char *alternate_name = malloc (namelen + 1);
750  if (unlikely (alternate_name == NULL))
751    {
752      free (modulesdir[0]);
753      return ENOMEM;
754    }
755  if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
756      !subst_name ('_', '-', module_name, alternate_name, namelen))
757    alternate_name[0] = '\0';
758
759  FTSENT *f;
760  int error = ENOENT;
761  while ((f = fts_read (fts)) != NULL)
762    {
763      /* Skip a "source" subtree, which tends to be large.
764	 This insane hard-coding of names is what depmod does too.  */
765      if (f->fts_namelen == sizeof "source" - 1
766	  && !strcmp (f->fts_name, "source"))
767	{
768	  fts_set (fts, f, FTS_SKIP);
769	  continue;
770	}
771
772      error = ENOENT;
773      switch (f->fts_info)
774	{
775	case FTS_F:
776	case FTS_SL:
777	case FTS_NSOK:
778	  /* See if this file name is "MODULE_NAME.ko".  */
779	  if (check_suffix (f, namelen)
780	      && (!memcmp (f->fts_name, module_name, namelen)
781		  || !memcmp (f->fts_name, alternate_name, namelen)))
782	    {
783	      int fd = open (f->fts_accpath, O_RDONLY);
784	      *file_name = strdup (f->fts_path);
785	      fts_close (fts);
786	      free (modulesdir[0]);
787	      free (alternate_name);
788	      if (fd < 0)
789		free (*file_name);
790	      else if (*file_name == NULL)
791		{
792		  close (fd);
793		  fd = -1;
794		}
795	      return fd;
796	    }
797	  break;
798
799	case FTS_ERR:
800	case FTS_DNR:
801	case FTS_NS:
802	  error = f->fts_errno;
803	  break;
804
805	case FTS_SLNONE:
806	default:
807	  break;
808	}
809    }
810
811  fts_close (fts);
812  free (modulesdir[0]);
813  free (alternate_name);
814  errno = error;
815  return -1;
816}
817INTDEF (dwfl_linux_kernel_find_elf)
818
819
820/* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
821   We read the information from /sys/module directly.  */
822
823int
824dwfl_linux_kernel_module_section_address
825(Dwfl_Module *mod __attribute__ ((unused)),
826 void **userdata __attribute__ ((unused)),
827 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
828 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
829 const GElf_Shdr *shdr __attribute__ ((unused)),
830 Dwarf_Addr *addr)
831{
832  char *sysfile;
833  if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
834    return DWARF_CB_ABORT;
835
836  FILE *f = fopen (sysfile, "r");
837  free (sysfile);
838
839  if (f == NULL)
840    {
841      if (errno == ENOENT)
842	{
843	  /* The .modinfo and .data.percpu sections are never kept
844	     loaded in the kernel.  If the kernel was compiled without
845	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
846	     actually loaded at all.
847
848	     Setting *ADDR to -1 tells the caller this section is
849	     actually absent from memory.  */
850
851	  if (!strcmp (secname, ".modinfo")
852	      || !strcmp (secname, ".data.percpu")
853	      || !strncmp (secname, ".exit", 5))
854	    {
855	      *addr = (Dwarf_Addr) -1l;
856	      return DWARF_CB_OK;
857	    }
858
859	  /* The goofy PPC64 module_frob_arch_sections function tweaks
860	     the section names as a way to control other kernel code's
861	     behavior, and this cruft leaks out into the /sys information.
862	     The file name for ".init*" may actually look like "_init*".  */
863
864	  const bool is_init = !strncmp (secname, ".init", 5);
865	  if (is_init)
866	    {
867	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
868			    modname, &secname[1]) < 0)
869		return ENOMEM;
870	      f = fopen (sysfile, "r");
871	      free (sysfile);
872	      if (f != NULL)
873		goto ok;
874	    }
875
876	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
877	     In case that size increases in the future, look for longer
878	     truncated names first.  */
879	  size_t namelen = strlen (secname);
880	  if (namelen >= MODULE_SECT_NAME_LEN)
881	    {
882	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
883				  modname, secname);
884	      if (len < 0)
885		return DWARF_CB_ABORT;
886	      char *end = sysfile + len;
887	      do
888		{
889		  *--end = '\0';
890		  f = fopen (sysfile, "r");
891		  if (is_init && f == NULL && errno == ENOENT)
892		    {
893		      sysfile[len - namelen] = '_';
894		      f = fopen (sysfile, "r");
895		      sysfile[len - namelen] = '.';
896		    }
897		}
898	      while (f == NULL && errno == ENOENT
899		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
900	      free (sysfile);
901
902	      if (f != NULL)
903		goto ok;
904	    }
905	}
906
907      return DWARF_CB_ABORT;
908    }
909
910 ok:
911  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
912
913  int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
914		: ferror_unlocked (f) ? errno : ENOEXEC);
915  fclose (f);
916
917  if (result == 0)
918    return DWARF_CB_OK;
919
920  errno = result;
921  return DWARF_CB_ABORT;
922}
923INTDEF (dwfl_linux_kernel_module_section_address)
924
925int
926dwfl_linux_kernel_report_modules (Dwfl *dwfl)
927{
928  FILE *f = fopen (MODULELIST, "r");
929  if (f == NULL)
930    return errno;
931
932  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
933
934  int result = 0;
935  Dwarf_Addr modaddr;
936  unsigned long int modsz;
937  char modname[128];
938  char *line = NULL;
939  size_t linesz = 0;
940  /* We can't just use fscanf here because it's not easy to distinguish \n
941     from other whitespace so as to take the optional word following the
942     address but always stop at the end of the line.  */
943  while (getline (&line, &linesz, f) > 0
944	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
945		    modname, &modsz, &modaddr) == 3)
946    {
947      Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
948						     modaddr, modaddr + modsz);
949      if (mod == NULL)
950	{
951	  result = -1;
952	  break;
953	}
954
955      result = check_module_notes (mod);
956    }
957  free (line);
958
959  if (result == 0)
960    result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
961
962  fclose (f);
963
964  return result;
965}
966INTDEF (dwfl_linux_kernel_report_modules)
967