1/* Standard libdwfl callbacks for debugging the running Linux kernel.
2   Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3   This file is part of elfutils.
4
5   This file is free software; you can redistribute it and/or modify
6   it under the terms of either
7
8     * the GNU Lesser General Public License as published by the Free
9       Software Foundation; either version 3 of the License, or (at
10       your option) any later version
11
12   or
13
14     * the GNU General Public License as published by the Free
15       Software Foundation; either version 2 of the License, or (at
16       your option) any later version
17
18   or both in parallel, as here.
19
20   elfutils is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received copies of the GNU General Public License and
26   the GNU Lesser General Public License along with this program.  If
27   not, see <http://www.gnu.org/licenses/>.  */
28
29/* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
30   Everything we need here is fine if its declarations just come first.  */
31/* Some makefiles, e.g. HOST_linux-x86.mk, predefine _FILE_OFFSET_BITS.  */
32#undef _FILE_OFFSET_BITS
33#include <fts.h>
34
35#include <config.h>
36
37#include "libdwflP.h"
38#include <inttypes.h>
39#include <errno.h>
40#include <stdio.h>
41#include <stdio_ext.h>
42#include <string.h>
43#include <stdlib.h>
44#include <sys/utsname.h>
45#include <fcntl.h>
46#include <unistd.h>
47
48/* Since fts.h is included before config.h, its indirect inclusions may not
49   give us the right LFS aliases of these functions, so map them manually.  */
50#ifdef _FILE_OFFSET_BITS
51#define open open64
52#define fopen fopen64
53#endif
54
55
56#define KERNEL_MODNAME	"kernel"
57
58#define MODULEDIRFMT	"/lib/modules/%s"
59
60#define KNOTESFILE	"/sys/kernel/notes"
61#define	MODNOTESFMT	"/sys/module/%s/notes"
62#define KSYMSFILE	"/proc/kallsyms"
63#define MODULELIST	"/proc/modules"
64#define	SECADDRDIRFMT	"/sys/module/%s/sections/"
65#define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
66
67
68static const char *vmlinux_suffixes[] =
69  {
70    ".gz",
71#ifdef USE_BZLIB
72    ".bz2",
73#endif
74#ifdef USE_LZMA
75    ".xz",
76#endif
77  };
78
79/* Try to open the given file as it is or under the debuginfo directory.  */
80static int
81try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
82{
83  if (*fname == NULL)
84    return -1;
85
86  /* Don't bother trying *FNAME itself here if the path will cause it to be
87     tried because we give its own basename as DEBUGLINK_FILE.  */
88  int fd = ((((dwfl->callbacks->debuginfo_path
89	       ? *dwfl->callbacks->debuginfo_path : NULL)
90	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
91	    : TEMP_FAILURE_RETRY (open (*fname, O_RDONLY)));
92
93  if (fd < 0)
94    {
95      Dwfl_Module fakemod = { .dwfl = dwfl };
96      /* First try the file's unadorned basename as DEBUGLINK_FILE,
97	 to look for "vmlinux" files.  */
98      fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
99						 *fname, basename (*fname), 0,
100						 &fakemod.debug.name);
101      if (fd < 0 && try_debug)
102	/* Next, let the call use the default of basename + ".debug",
103	   to look for "vmlinux.debug" files.  */
104	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
105						   *fname, NULL, 0,
106						   &fakemod.debug.name);
107      if (fakemod.debug.name != NULL)
108	{
109	  free (*fname);
110	  *fname = fakemod.debug.name;
111	}
112    }
113
114  if (fd < 0)
115    for (size_t i = 0;
116	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
117	 ++i)
118      {
119	char *zname;
120	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
121	  {
122	    fd = TEMP_FAILURE_RETRY (open (zname, O_RDONLY));
123	    if (fd < 0)
124	      free (zname);
125	    else
126	      {
127		free (*fname);
128		*fname = zname;
129	      }
130	  }
131      }
132
133  if (fd < 0)
134    {
135      free (*fname);
136      *fname = NULL;
137    }
138
139  return fd;
140}
141
142static inline const char *
143kernel_release (void)
144{
145  /* Cache the `uname -r` string we'll use.  */
146  static struct utsname utsname;
147  if (utsname.release[0] == '\0' && uname (&utsname) != 0)
148    return NULL;
149  return utsname.release;
150}
151
152static int
153find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
154{
155  if ((release[0] == '/'
156       ? asprintf (fname, "%s/vmlinux", release)
157       : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
158    return -1;
159
160  int fd = try_kernel_name (dwfl, fname, true);
161  if (fd < 0 && release[0] != '/')
162    {
163      free (*fname);
164      if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
165	return -1;
166      fd = try_kernel_name (dwfl, fname, true);
167    }
168
169  return fd;
170}
171
172static int
173get_release (Dwfl *dwfl, const char **release)
174{
175  if (dwfl == NULL)
176    return -1;
177
178  const char *release_string = release == NULL ? NULL : *release;
179  if (release_string == NULL)
180    {
181      release_string = kernel_release ();
182      if (release_string == NULL)
183	return errno;
184      if (release != NULL)
185	*release = release_string;
186    }
187
188  return 0;
189}
190
191static int
192report_kernel (Dwfl *dwfl, const char **release,
193	       int (*predicate) (const char *module, const char *file))
194{
195  int result = get_release (dwfl, release);
196  if (unlikely (result != 0))
197    return result;
198
199  char *fname;
200  int fd = find_kernel_elf (dwfl, *release, &fname);
201
202  if (fd < 0)
203    result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
204	      ? 0 : errno ?: ENOENT);
205  else
206    {
207      bool report = true;
208
209      if (predicate != NULL)
210	{
211	  /* Let the predicate decide whether to use this one.  */
212	  int want = (*predicate) (KERNEL_MODNAME, fname);
213	  if (want < 0)
214	    result = errno;
215	  report = want > 0;
216	}
217
218      if (report)
219	{
220	  /* Note that on some architectures (e.g. x86_64) the vmlinux
221	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
222	     In both cases the phdr p_vaddr load address will be non-zero.
223	     We want the image to be placed as if it was ET_DYN, so
224	     pass true for add_p_vaddr which will do the right thing
225	     (in combination with a zero base) in either case.  */
226	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
227						      fname, fd, 0, true);
228	  if (mod == NULL)
229	    result = -1;
230	  else
231	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
232	    mod->e_type = ET_DYN;
233	}
234
235      free (fname);
236
237      if (!report || result < 0)
238	close (fd);
239    }
240
241  return result;
242}
243
244/* Look for a kernel debug archive.  If we find one, report all its modules.
245   If not, return ENOENT.  */
246static int
247report_kernel_archive (Dwfl *dwfl, const char **release,
248		       int (*predicate) (const char *module, const char *file))
249{
250  int result = get_release (dwfl, release);
251  if (unlikely (result != 0))
252    return result;
253
254  char *archive;
255  int res = (((*release)[0] == '/')
256	     ? asprintf (&archive, "%s/debug.a", *release)
257	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
258  if (unlikely (res < 0))
259    return ENOMEM;
260
261  int fd = try_kernel_name (dwfl, &archive, false);
262  if (fd < 0)
263    result = errno ?: ENOENT;
264  else
265    {
266      /* We have the archive file open!  */
267      Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
268						    true, predicate);
269      if (unlikely (last == NULL))
270	result = -1;
271      else
272	{
273	  /* Find the kernel and move it to the head of the list.  */
274	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
275	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
276	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
277	      {
278		*prevp = m->next;
279		m->next = *tailp;
280		*tailp = m;
281		break;
282	      }
283	}
284    }
285
286  free (archive);
287  return result;
288}
289
290static size_t
291check_suffix (const FTSENT *f, size_t namelen)
292{
293#define TRY(sfx)							\
294  if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
295       : f->fts_namelen >= sizeof sfx)					\
296      && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
297		  sfx, sizeof sfx))					\
298    return sizeof sfx - 1
299
300  TRY (".ko");
301  TRY (".ko.gz");
302#if USE_BZLIB
303  TRY (".ko.bz2");
304#endif
305#if USE_LZMA
306  TRY (".ko.xz");
307#endif
308
309  return 0;
310
311#undef	TRY
312}
313
314/* Report a kernel and all its modules found on disk, for offline use.
315   If RELEASE starts with '/', it names a directory to look in;
316   if not, it names a directory to find under /lib/modules/;
317   if null, /lib/modules/`uname -r` is used.
318   Returns zero on success, -1 if dwfl_report_module failed,
319   or an errno code if finding the files on disk failed.  */
320int
321dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
322				  int (*predicate) (const char *module,
323						    const char *file))
324{
325  int result = report_kernel_archive (dwfl, &release, predicate);
326  if (result != ENOENT)
327    return result;
328
329  /* First report the kernel.  */
330  result = report_kernel (dwfl, &release, predicate);
331  if (result == 0)
332    {
333      /* Do "find /lib/modules/RELEASE -name *.ko".  */
334
335      char *modulesdir[] = { NULL, NULL };
336      if (release[0] == '/')
337	modulesdir[0] = (char *) release;
338      else
339	{
340	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
341	    return errno;
342	}
343
344      FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
345      if (modulesdir[0] == (char *) release)
346	modulesdir[0] = NULL;
347      if (fts == NULL)
348	{
349	  free (modulesdir[0]);
350	  return errno;
351	}
352
353      FTSENT *f;
354      while ((f = fts_read (fts)) != NULL)
355	{
356	  /* Skip a "source" subtree, which tends to be large.
357	     This insane hard-coding of names is what depmod does too.  */
358	  if (f->fts_namelen == sizeof "source" - 1
359	      && !strcmp (f->fts_name, "source"))
360	    {
361	      fts_set (fts, f, FTS_SKIP);
362	      continue;
363	    }
364
365	  switch (f->fts_info)
366	    {
367	    case FTS_F:
368	    case FTS_SL:
369	    case FTS_NSOK:;
370	      /* See if this file name matches "*.ko".  */
371	      const size_t suffix = check_suffix (f, 0);
372	      if (suffix)
373		{
374		  /* We have a .ko file to report.  Following the algorithm
375		     by which the kernel makefiles set KBUILD_MODNAME, we
376		     replace all ',' or '-' with '_' in the file name and
377		     call that the module name.  Modules could well be
378		     built using different embedded names than their file
379		     names.  To handle that, we would have to look at the
380		     __this_module.name contents in the module's text.  */
381
382		  char *name = strndup (f->fts_name, f->fts_namelen - suffix);
383		  if (unlikely (name == NULL))
384		    {
385		      __libdwfl_seterrno (DWFL_E_NOMEM);
386		      result = -1;
387		      break;
388		    }
389		  for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
390		    if (name[i] == '-' || name[i] == ',')
391		      name[i] = '_';
392
393		  if (predicate != NULL)
394		    {
395		      /* Let the predicate decide whether to use this one.  */
396		      int want = (*predicate) (name, f->fts_path);
397		      if (want < 0)
398			{
399			  result = -1;
400			  free (name);
401			  break;
402			}
403		      if (!want)
404			{
405			  free (name);
406			  continue;
407			}
408		    }
409
410		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
411		    {
412		      free (name);
413		      result = -1;
414		      break;
415		    }
416		  free (name);
417		}
418	      continue;
419
420	    case FTS_ERR:
421	    case FTS_DNR:
422	    case FTS_NS:
423	      result = f->fts_errno;
424	      break;
425
426	    case FTS_SLNONE:
427	    default:
428	      continue;
429	    }
430
431	  /* We only get here in error cases.  */
432	  break;
433	}
434      fts_close (fts);
435      free (modulesdir[0]);
436    }
437
438  return result;
439}
440INTDEF (dwfl_linux_kernel_report_offline)
441
442
443/* State of read_address used by intuit_kernel_bounds. */
444struct read_address_state {
445  FILE *f;
446  char *line;
447  size_t linesz;
448  size_t n;
449  char *p;
450  const char *type;
451};
452
453static inline bool
454read_address (struct read_address_state *state, Dwarf_Addr *addr)
455{
456  if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
457      state->line[state->n - 2] == ']')
458    return false;
459  *addr = strtoull (state->line, &state->p, 16);
460  state->p += strspn (state->p, " \t");
461  state->type = strsep (&state->p, " \t\n");
462  if (state->type == NULL)
463    return false;
464  return state->p != NULL && state->p != state->line;
465}
466
467
468/* Grovel around to guess the bounds of the runtime kernel image.  */
469static int
470intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
471{
472  struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
473
474  state.f = fopen (KSYMSFILE, "r");
475  if (state.f == NULL)
476    return errno;
477
478  (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
479
480  *notes = 0;
481
482  int result;
483  do
484    result = read_address (&state, start) ? 0 : -1;
485  while (result == 0 && strchr ("TtRr", *state.type) == NULL);
486
487  if (result == 0)
488    {
489      *end = *start;
490      while (read_address (&state, end))
491	if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
492	  *notes = *end;
493
494      Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
495      *start &= -(Dwarf_Addr) round_kernel;
496      *end += round_kernel - 1;
497      *end &= -(Dwarf_Addr) round_kernel;
498      if (*start >= *end || *end - *start < round_kernel)
499	result = -1;
500    }
501  free (state.line);
502
503  if (result == -1)
504    result = ferror_unlocked (state.f) ? errno : ENOEXEC;
505
506  fclose (state.f);
507
508  return result;
509}
510
511
512/* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
513static int
514check_notes (Dwfl_Module *mod, const char *notesfile,
515	     Dwarf_Addr vaddr, const char *secname)
516{
517  int fd = open (notesfile, O_RDONLY);
518  if (fd < 0)
519    return 1;
520
521  assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
522  assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
523  union
524  {
525    GElf_Nhdr nhdr;
526    unsigned char data[8192];
527  } buf;
528
529  ssize_t n = read (fd, buf.data, sizeof buf);
530  close (fd);
531
532  if (n <= 0)
533    return 1;
534
535  unsigned char *p = buf.data;
536  while (p < &buf.data[n])
537    {
538      /* No translation required since we are reading the native kernel.  */
539      GElf_Nhdr *nhdr = (void *) p;
540      p += sizeof *nhdr;
541      unsigned char *name = p;
542      p += (nhdr->n_namesz + 3) & -4U;
543      unsigned char *bits = p;
544      p += (nhdr->n_descsz + 3) & -4U;
545
546      if (p <= &buf.data[n]
547	  && nhdr->n_type == NT_GNU_BUILD_ID
548	  && nhdr->n_namesz == sizeof "GNU"
549	  && !memcmp (name, "GNU", sizeof "GNU"))
550	{
551	  /* Found it.  For a module we must figure out its VADDR now.  */
552
553	  if (secname != NULL
554	      && (INTUSE(dwfl_linux_kernel_module_section_address)
555		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
556		  || vaddr == (GElf_Addr) -1l))
557	    vaddr = 0;
558
559	  if (vaddr != 0)
560	    vaddr += bits - buf.data;
561	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
562						      nhdr->n_descsz, vaddr);
563	}
564    }
565
566  return 0;
567}
568
569/* Look for a build ID for the kernel.  */
570static int
571check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
572{
573  return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
574}
575
576/* Look for a build ID for a loaded kernel module.  */
577static int
578check_module_notes (Dwfl_Module *mod)
579{
580  char *dirs[2] = { NULL, NULL };
581  if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
582    return ENOMEM;
583
584  FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
585  if (fts == NULL)
586    {
587      free (dirs[0]);
588      return 0;
589    }
590
591  int result = 0;
592  FTSENT *f;
593  while ((f = fts_read (fts)) != NULL)
594    {
595      switch (f->fts_info)
596	{
597	case FTS_F:
598	case FTS_SL:
599	case FTS_NSOK:
600	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
601	  if (result > 0)	/* Nothing found.  */
602	    {
603	      result = 0;
604	      continue;
605	    }
606	  break;
607
608	case FTS_ERR:
609	case FTS_DNR:
610	  result = f->fts_errno;
611	  break;
612
613	case FTS_NS:
614	case FTS_SLNONE:
615	default:
616	  continue;
617	}
618
619      /* We only get here when finished or in error cases.  */
620      break;
621    }
622  fts_close (fts);
623  free (dirs[0]);
624
625  return result;
626}
627
628int
629dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
630{
631  Dwarf_Addr start = 0;
632  Dwarf_Addr end = 0;
633
634  #define report() \
635    (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
636
637  /* This is a bit of a kludge.  If we already reported the kernel,
638     don't bother figuring it out again--it never changes.  */
639  for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
640    if (!strcmp (m->name, KERNEL_MODNAME))
641      {
642	start = m->low_addr;
643	end = m->high_addr;
644	return report () == NULL ? -1 : 0;
645      }
646
647  /* Try to figure out the bounds of the kernel image without
648     looking for any vmlinux file.  */
649  Dwarf_Addr notes;
650  /* The compiler cannot deduce that if intuit_kernel_bounds returns
651     zero NOTES will be initialized.  Fake the initialization.  */
652  asm ("" : "=m" (notes));
653  int result = intuit_kernel_bounds (&start, &end, &notes);
654  if (result == 0)
655    {
656      Dwfl_Module *mod = report ();
657      return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
658    }
659  if (result != ENOENT)
660    return result;
661
662  /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
663  return report_kernel (dwfl, NULL, NULL);
664}
665INTDEF (dwfl_linux_kernel_report_kernel)
666
667
668static inline bool
669subst_name (char from, char to,
670            const char * const module_name,
671            char * const alternate_name,
672            const size_t namelen)
673{
674  const char *n = memchr (module_name, from, namelen);
675  if (n == NULL)
676    return false;
677  char *a = mempcpy (alternate_name, module_name, n - module_name);
678  *a++ = to;
679  ++n;
680  const char *p;
681  while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
682    {
683      a = mempcpy (a, n, p - n);
684      *a++ = to;
685      n = p + 1;
686    }
687  memcpy (a, n, namelen - (n - module_name) + 1);
688  return true;
689}
690
691/* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
692
693int
694dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
695			    void **userdata __attribute__ ((unused)),
696			    const char *module_name,
697			    Dwarf_Addr base __attribute__ ((unused)),
698			    char **file_name, Elf **elfp)
699{
700  if (mod->build_id_len > 0)
701    {
702      int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
703					       file_name, elfp);
704      if (fd >= 0 || mod->main.elf != NULL || errno != 0)
705	return fd;
706    }
707
708  const char *release = kernel_release ();
709  if (release == NULL)
710    return errno;
711
712  if (!strcmp (module_name, KERNEL_MODNAME))
713    return find_kernel_elf (mod->dwfl, release, file_name);
714
715  /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
716
717  char *modulesdir[] = { NULL, NULL };
718  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
719    return -1;
720
721  FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
722  if (fts == NULL)
723    {
724      free (modulesdir[0]);
725      return -1;
726    }
727
728  size_t namelen = strlen (module_name);
729
730  /* This is a kludge.  There is no actual necessary relationship between
731     the name of the .ko file installed and the module name the kernel
732     knows it by when it's loaded.  The kernel's only idea of the module
733     name comes from the name embedded in the object's magic
734     .gnu.linkonce.this_module section.
735
736     In practice, these module names match the .ko file names except for
737     some using '_' and some using '-'.  So our cheap kludge is to look for
738     two files when either a '_' or '-' appears in a module name, one using
739     only '_' and one only using '-'.  */
740
741  char *alternate_name = malloc (namelen + 1);
742  if (unlikely (alternate_name == NULL))
743    {
744      free (modulesdir[0]);
745      return ENOMEM;
746    }
747  if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
748      !subst_name ('_', '-', module_name, alternate_name, namelen))
749    alternate_name[0] = '\0';
750
751  FTSENT *f;
752  int error = ENOENT;
753  while ((f = fts_read (fts)) != NULL)
754    {
755      /* Skip a "source" subtree, which tends to be large.
756	 This insane hard-coding of names is what depmod does too.  */
757      if (f->fts_namelen == sizeof "source" - 1
758	  && !strcmp (f->fts_name, "source"))
759	{
760	  fts_set (fts, f, FTS_SKIP);
761	  continue;
762	}
763
764      error = ENOENT;
765      switch (f->fts_info)
766	{
767	case FTS_F:
768	case FTS_SL:
769	case FTS_NSOK:
770	  /* See if this file name is "MODULE_NAME.ko".  */
771	  if (check_suffix (f, namelen)
772	      && (!memcmp (f->fts_name, module_name, namelen)
773		  || !memcmp (f->fts_name, alternate_name, namelen)))
774	    {
775	      int fd = open (f->fts_accpath, O_RDONLY);
776	      *file_name = strdup (f->fts_path);
777	      fts_close (fts);
778	      free (modulesdir[0]);
779	      free (alternate_name);
780	      if (fd < 0)
781		free (*file_name);
782	      else if (*file_name == NULL)
783		{
784		  close (fd);
785		  fd = -1;
786		}
787	      return fd;
788	    }
789	  break;
790
791	case FTS_ERR:
792	case FTS_DNR:
793	case FTS_NS:
794	  error = f->fts_errno;
795	  break;
796
797	case FTS_SLNONE:
798	default:
799	  break;
800	}
801    }
802
803  fts_close (fts);
804  free (modulesdir[0]);
805  free (alternate_name);
806  errno = error;
807  return -1;
808}
809INTDEF (dwfl_linux_kernel_find_elf)
810
811
812/* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
813   We read the information from /sys/module directly.  */
814
815int
816dwfl_linux_kernel_module_section_address
817(Dwfl_Module *mod __attribute__ ((unused)),
818 void **userdata __attribute__ ((unused)),
819 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
820 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
821 const GElf_Shdr *shdr __attribute__ ((unused)),
822 Dwarf_Addr *addr)
823{
824  char *sysfile;
825  if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
826    return DWARF_CB_ABORT;
827
828  FILE *f = fopen (sysfile, "r");
829  free (sysfile);
830
831  if (f == NULL)
832    {
833      if (errno == ENOENT)
834	{
835	  /* The .modinfo and .data.percpu sections are never kept
836	     loaded in the kernel.  If the kernel was compiled without
837	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
838	     actually loaded at all.
839
840	     Setting *ADDR to -1 tells the caller this section is
841	     actually absent from memory.  */
842
843	  if (!strcmp (secname, ".modinfo")
844	      || !strcmp (secname, ".data.percpu")
845	      || !strncmp (secname, ".exit", 5))
846	    {
847	      *addr = (Dwarf_Addr) -1l;
848	      return DWARF_CB_OK;
849	    }
850
851	  /* The goofy PPC64 module_frob_arch_sections function tweaks
852	     the section names as a way to control other kernel code's
853	     behavior, and this cruft leaks out into the /sys information.
854	     The file name for ".init*" may actually look like "_init*".  */
855
856	  const bool is_init = !strncmp (secname, ".init", 5);
857	  if (is_init)
858	    {
859	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
860			    modname, &secname[1]) < 0)
861		return ENOMEM;
862	      f = fopen (sysfile, "r");
863	      free (sysfile);
864	      if (f != NULL)
865		goto ok;
866	    }
867
868	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
869	     In case that size increases in the future, look for longer
870	     truncated names first.  */
871	  size_t namelen = strlen (secname);
872	  if (namelen >= MODULE_SECT_NAME_LEN)
873	    {
874	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
875				  modname, secname);
876	      if (len < 0)
877		return DWARF_CB_ABORT;
878	      char *end = sysfile + len;
879	      do
880		{
881		  *--end = '\0';
882		  f = fopen (sysfile, "r");
883		  if (is_init && f == NULL && errno == ENOENT)
884		    {
885		      sysfile[len - namelen] = '_';
886		      f = fopen (sysfile, "r");
887		      sysfile[len - namelen] = '.';
888		    }
889		}
890	      while (f == NULL && errno == ENOENT
891		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
892	      free (sysfile);
893
894	      if (f != NULL)
895		goto ok;
896	    }
897	}
898
899      return DWARF_CB_ABORT;
900    }
901
902 ok:
903  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
904
905  int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
906		: ferror_unlocked (f) ? errno : ENOEXEC);
907  fclose (f);
908
909  if (result == 0)
910    return DWARF_CB_OK;
911
912  errno = result;
913  return DWARF_CB_ABORT;
914}
915INTDEF (dwfl_linux_kernel_module_section_address)
916
917int
918dwfl_linux_kernel_report_modules (Dwfl *dwfl)
919{
920  FILE *f = fopen (MODULELIST, "r");
921  if (f == NULL)
922    return errno;
923
924  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
925
926  int result = 0;
927  Dwarf_Addr modaddr;
928  unsigned long int modsz;
929  char modname[128];
930  char *line = NULL;
931  size_t linesz = 0;
932  /* We can't just use fscanf here because it's not easy to distinguish \n
933     from other whitespace so as to take the optional word following the
934     address but always stop at the end of the line.  */
935  while (getline (&line, &linesz, f) > 0
936	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
937		    modname, &modsz, &modaddr) == 3)
938    {
939      Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
940						     modaddr, modaddr + modsz);
941      if (mod == NULL)
942	{
943	  result = -1;
944	  break;
945	}
946
947      result = check_module_notes (mod);
948    }
949  free (line);
950
951  if (result == 0)
952    result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
953
954  fclose (f);
955
956  return result;
957}
958INTDEF (dwfl_linux_kernel_report_modules)
959