1/* Standard libdwfl callbacks for debugging the running Linux kernel.
2   Copyright (C) 2005-2011, 2013, 2014 Red Hat, Inc.
3   This file is part of elfutils.
4
5   This file is free software; you can redistribute it and/or modify
6   it under the terms of either
7
8     * the GNU Lesser General Public License as published by the Free
9       Software Foundation; either version 3 of the License, or (at
10       your option) any later version
11
12   or
13
14     * the GNU General Public License as published by the Free
15       Software Foundation; either version 2 of the License, or (at
16       your option) any later version
17
18   or both in parallel, as here.
19
20   elfutils is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received copies of the GNU General Public License and
26   the GNU Lesser General Public License along with this program.  If
27   not, see <http://www.gnu.org/licenses/>.  */
28
29/* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
30   Everything we need here is fine if its declarations just come first.  */
31
32#undef _FILE_OFFSET_BITS  // Undo the damage caused by AndroidConfig.h.
33#include <fts.h>
34
35#include <config.h>
36
37#include "libdwflP.h"
38#include <inttypes.h>
39#include <errno.h>
40#include <stdio.h>
41#include <stdio_ext.h>
42#include <string.h>
43#include <stdlib.h>
44#include <sys/utsname.h>
45#include <fcntl.h>
46#include <unistd.h>
47
48
49#define KERNEL_MODNAME	"kernel"
50
51#define MODULEDIRFMT	"/lib/modules/%s"
52
53#define KNOTESFILE	"/sys/kernel/notes"
54#define	MODNOTESFMT	"/sys/module/%s/notes"
55#define KSYMSFILE	"/proc/kallsyms"
56#define MODULELIST	"/proc/modules"
57#define	SECADDRDIRFMT	"/sys/module/%s/sections/"
58#define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
59
60
61#if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
62static const char *vmlinux_suffixes[] =
63  {
64#ifdef USE_ZLIB
65    ".gz",
66#endif
67#ifdef USE_BZLIB
68    ".bz2",
69#endif
70#ifdef USE_LZMA
71    ".xz",
72#endif
73  };
74#endif
75
76/* Try to open the given file as it is or under the debuginfo directory.  */
77static int
78try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
79{
80  if (*fname == NULL)
81    return -1;
82
83  /* Don't bother trying *FNAME itself here if the path will cause it to be
84     tried because we give its own basename as DEBUGLINK_FILE.  */
85  int fd = ((((dwfl->callbacks->debuginfo_path
86	       ? *dwfl->callbacks->debuginfo_path : NULL)
87	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
88	    : TEMP_FAILURE_RETRY (open64 (*fname, O_RDONLY)));
89
90  if (fd < 0)
91    {
92      Dwfl_Module fakemod = { .dwfl = dwfl };
93      /* First try the file's unadorned basename as DEBUGLINK_FILE,
94	 to look for "vmlinux" files.  */
95      fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
96						 *fname, basename (*fname), 0,
97						 &fakemod.debug.name);
98      if (fd < 0 && try_debug)
99	/* Next, let the call use the default of basename + ".debug",
100	   to look for "vmlinux.debug" files.  */
101	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
102						   *fname, NULL, 0,
103						   &fakemod.debug.name);
104      if (fakemod.debug.name != NULL)
105	{
106	  free (*fname);
107	  *fname = fakemod.debug.name;
108	}
109    }
110
111#if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
112  if (fd < 0)
113    for (size_t i = 0;
114	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
115	 ++i)
116      {
117	char *zname;
118	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
119	  {
120	    fd = TEMP_FAILURE_RETRY (open64 (zname, O_RDONLY));
121	    if (fd < 0)
122	      free (zname);
123	    else
124	      {
125		free (*fname);
126		*fname = zname;
127	      }
128	  }
129      }
130#endif
131
132  if (fd < 0)
133    {
134      free (*fname);
135      *fname = NULL;
136    }
137
138  return fd;
139}
140
141static inline const char *
142kernel_release (void)
143{
144  /* Cache the `uname -r` string we'll use.  */
145  static struct utsname utsname;
146  if (utsname.release[0] == '\0' && uname (&utsname) != 0)
147    return NULL;
148  return utsname.release;
149}
150
151static int
152find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
153{
154  if ((release[0] == '/'
155       ? asprintf (fname, "%s/vmlinux", release)
156       : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
157    return -1;
158
159  int fd = try_kernel_name (dwfl, fname, true);
160  if (fd < 0 && release[0] != '/')
161    {
162      free (*fname);
163      if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
164	return -1;
165      fd = try_kernel_name (dwfl, fname, true);
166    }
167
168  return fd;
169}
170
171static int
172get_release (Dwfl *dwfl, const char **release)
173{
174  if (dwfl == NULL)
175    return -1;
176
177  const char *release_string = release == NULL ? NULL : *release;
178  if (release_string == NULL)
179    {
180      release_string = kernel_release ();
181      if (release_string == NULL)
182	return errno;
183      if (release != NULL)
184	*release = release_string;
185    }
186
187  return 0;
188}
189
190static int
191report_kernel (Dwfl *dwfl, const char **release,
192	       int (*predicate) (const char *module, const char *file))
193{
194  int result = get_release (dwfl, release);
195  if (unlikely (result != 0))
196    return result;
197
198  char *fname;
199  int fd = find_kernel_elf (dwfl, *release, &fname);
200
201  if (fd < 0)
202    result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
203	      ? 0 : errno ?: ENOENT);
204  else
205    {
206      bool report = true;
207
208      if (predicate != NULL)
209	{
210	  /* Let the predicate decide whether to use this one.  */
211	  int want = (*predicate) (KERNEL_MODNAME, fname);
212	  if (want < 0)
213	    result = errno;
214	  report = want > 0;
215	}
216
217      if (report)
218	{
219	  /* Note that on some architectures (e.g. x86_64) the vmlinux
220	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
221	     In both cases the phdr p_vaddr load address will be non-zero.
222	     We want the image to be placed as if it was ET_DYN, so
223	     pass true for add_p_vaddr which will do the right thing
224	     (in combination with a zero base) in either case.  */
225	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
226						      fname, fd, 0, true);
227	  if (mod == NULL)
228	    result = -1;
229	  else
230	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
231	    mod->e_type = ET_DYN;
232	}
233
234      free (fname);
235
236      if (!report || result < 0)
237	close (fd);
238    }
239
240  return result;
241}
242
243/* Look for a kernel debug archive.  If we find one, report all its modules.
244   If not, return ENOENT.  */
245static int
246report_kernel_archive (Dwfl *dwfl, const char **release,
247		       int (*predicate) (const char *module, const char *file))
248{
249  int result = get_release (dwfl, release);
250  if (unlikely (result != 0))
251    return result;
252
253  char *archive;
254  int res = (((*release)[0] == '/')
255	     ? asprintf (&archive, "%s/debug.a", *release)
256	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
257  if (unlikely (res < 0))
258    return ENOMEM;
259
260  int fd = try_kernel_name (dwfl, &archive, false);
261  if (fd < 0)
262    result = errno ?: ENOENT;
263  else
264    {
265      /* We have the archive file open!  */
266      Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
267						    true, predicate);
268      if (unlikely (last == NULL))
269	result = -1;
270      else
271	{
272	  /* Find the kernel and move it to the head of the list.  */
273	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
274	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
275	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
276	      {
277		*prevp = m->next;
278		m->next = *tailp;
279		*tailp = m;
280		break;
281	      }
282	}
283    }
284
285  free (archive);
286  return result;
287}
288
289static size_t
290check_suffix (const FTSENT *f, size_t namelen)
291{
292#define TRY(sfx)							\
293  if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
294       : f->fts_namelen >= sizeof sfx)					\
295      && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
296		  sfx, sizeof sfx))					\
297    return sizeof sfx - 1
298
299  TRY (".ko");
300#if USE_ZLIB
301  TRY (".ko.gz");
302#endif
303#if USE_BZLIB
304  TRY (".ko.bz2");
305#endif
306#if USE_LZMA
307  TRY (".ko.xz");
308#endif
309
310  return 0;
311
312#undef	TRY
313}
314
315/* Report a kernel and all its modules found on disk, for offline use.
316   If RELEASE starts with '/', it names a directory to look in;
317   if not, it names a directory to find under /lib/modules/;
318   if null, /lib/modules/`uname -r` is used.
319   Returns zero on success, -1 if dwfl_report_module failed,
320   or an errno code if finding the files on disk failed.  */
321int
322dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
323				  int (*predicate) (const char *module,
324						    const char *file))
325{
326  int result = report_kernel_archive (dwfl, &release, predicate);
327  if (result != ENOENT)
328    return result;
329
330  /* First report the kernel.  */
331  result = report_kernel (dwfl, &release, predicate);
332  if (result == 0)
333    {
334      /* Do "find /lib/modules/RELEASE -name *.ko".  */
335
336      char *modulesdir[] = { NULL, NULL };
337      if (release[0] == '/')
338	modulesdir[0] = (char *) release;
339      else
340	{
341	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
342	    return errno;
343	}
344
345      FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
346      if (modulesdir[0] == (char *) release)
347	modulesdir[0] = NULL;
348      if (fts == NULL)
349	{
350	  free (modulesdir[0]);
351	  return errno;
352	}
353
354      FTSENT *f;
355      while ((f = fts_read (fts)) != NULL)
356	{
357	  /* Skip a "source" subtree, which tends to be large.
358	     This insane hard-coding of names is what depmod does too.  */
359	  if (f->fts_namelen == sizeof "source" - 1
360	      && !strcmp (f->fts_name, "source"))
361	    {
362	      fts_set (fts, f, FTS_SKIP);
363	      continue;
364	    }
365
366	  switch (f->fts_info)
367	    {
368	    case FTS_F:
369	    case FTS_SL:
370	    case FTS_NSOK:;
371	      /* See if this file name matches "*.ko".  */
372	      const size_t suffix = check_suffix (f, 0);
373	      if (suffix)
374		{
375		  /* We have a .ko file to report.  Following the algorithm
376		     by which the kernel makefiles set KBUILD_MODNAME, we
377		     replace all ',' or '-' with '_' in the file name and
378		     call that the module name.  Modules could well be
379		     built using different embedded names than their file
380		     names.  To handle that, we would have to look at the
381		     __this_module.name contents in the module's text.  */
382
383		  char name[f->fts_namelen - suffix + 1];
384		  for (size_t i = 0; i < f->fts_namelen - 3U; ++i)
385		    if (f->fts_name[i] == '-' || f->fts_name[i] == ',')
386		      name[i] = '_';
387		    else
388		      name[i] = f->fts_name[i];
389		  name[f->fts_namelen - suffix] = '\0';
390
391		  if (predicate != NULL)
392		    {
393		      /* Let the predicate decide whether to use this one.  */
394		      int want = (*predicate) (name, f->fts_path);
395		      if (want < 0)
396			{
397			  result = -1;
398			  break;
399			}
400		      if (!want)
401			continue;
402		    }
403
404		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
405		    {
406		      result = -1;
407		      break;
408		    }
409		}
410	      continue;
411
412	    case FTS_ERR:
413	    case FTS_DNR:
414	    case FTS_NS:
415	      result = f->fts_errno;
416	      break;
417
418	    case FTS_SLNONE:
419	    default:
420	      continue;
421	    }
422
423	  /* We only get here in error cases.  */
424	  break;
425	}
426      fts_close (fts);
427      free (modulesdir[0]);
428    }
429
430  return result;
431}
432INTDEF (dwfl_linux_kernel_report_offline)
433
434
435/* Grovel around to guess the bounds of the runtime kernel image.  */
436static int
437intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
438{
439  FILE *f = fopen (KSYMSFILE, "r");
440  if (f == NULL)
441    return errno;
442
443  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
444
445  *notes = 0;
446
447  char *line = NULL;
448  size_t linesz = 0;
449  size_t n;
450  char *p = NULL;
451  const char *type;
452
453  inline bool read_address (Dwarf_Addr *addr)
454  {
455    if ((n = getline (&line, &linesz, f)) < 1 || line[n - 2] == ']')
456      return false;
457    *addr = strtoull (line, &p, 16);
458    p += strspn (p, " \t");
459    type = strsep (&p, " \t\n");
460    if (type == NULL)
461      return false;
462    return p != NULL && p != line;
463  }
464
465  int result;
466  do
467    result = read_address (start) ? 0 : -1;
468  while (result == 0 && strchr ("TtRr", *type) == NULL);
469
470  if (result == 0)
471    {
472      *end = *start;
473      while (read_address (end))
474	if (*notes == 0 && !strcmp (p, "__start_notes\n"))
475	  *notes = *end;
476
477      Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
478      *start &= -(Dwarf_Addr) round_kernel;
479      *end += round_kernel - 1;
480      *end &= -(Dwarf_Addr) round_kernel;
481      if (*start >= *end || *end - *start < round_kernel)
482	result = -1;
483    }
484  free (line);
485
486  if (result == -1)
487    result = ferror_unlocked (f) ? errno : ENOEXEC;
488
489  fclose (f);
490
491  return result;
492}
493
494
495/* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
496static int
497check_notes (Dwfl_Module *mod, const char *notesfile,
498	     Dwarf_Addr vaddr, const char *secname)
499{
500  int fd = open64 (notesfile, O_RDONLY);
501  if (fd < 0)
502    return 1;
503
504  assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
505  assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
506  union
507  {
508    GElf_Nhdr nhdr;
509    unsigned char data[8192];
510  } buf;
511
512  ssize_t n = read (fd, buf.data, sizeof buf);
513  close (fd);
514
515  if (n <= 0)
516    return 1;
517
518  unsigned char *p = buf.data;
519  while (p < &buf.data[n])
520    {
521      /* No translation required since we are reading the native kernel.  */
522      GElf_Nhdr *nhdr = (void *) p;
523      p += sizeof *nhdr;
524      unsigned char *name = p;
525      p += (nhdr->n_namesz + 3) & -4U;
526      unsigned char *bits = p;
527      p += (nhdr->n_descsz + 3) & -4U;
528
529      if (p <= &buf.data[n]
530	  && nhdr->n_type == NT_GNU_BUILD_ID
531	  && nhdr->n_namesz == sizeof "GNU"
532	  && !memcmp (name, "GNU", sizeof "GNU"))
533	{
534	  /* Found it.  For a module we must figure out its VADDR now.  */
535
536	  if (secname != NULL
537	      && (INTUSE(dwfl_linux_kernel_module_section_address)
538		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
539		  || vaddr == (GElf_Addr) -1l))
540	    vaddr = 0;
541
542	  if (vaddr != 0)
543	    vaddr += bits - buf.data;
544	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
545						      nhdr->n_descsz, vaddr);
546	}
547    }
548
549  return 0;
550}
551
552/* Look for a build ID for the kernel.  */
553static int
554check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
555{
556  return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
557}
558
559/* Look for a build ID for a loaded kernel module.  */
560static int
561check_module_notes (Dwfl_Module *mod)
562{
563  char *dirs[2] = { NULL, NULL };
564  if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
565    return ENOMEM;
566
567  FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
568  if (fts == NULL)
569    {
570      free (dirs[0]);
571      return 0;
572    }
573
574  int result = 0;
575  FTSENT *f;
576  while ((f = fts_read (fts)) != NULL)
577    {
578      switch (f->fts_info)
579	{
580	case FTS_F:
581	case FTS_SL:
582	case FTS_NSOK:
583	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
584	  if (result > 0)	/* Nothing found.  */
585	    {
586	      result = 0;
587	      continue;
588	    }
589	  break;
590
591	case FTS_ERR:
592	case FTS_DNR:
593	  result = f->fts_errno;
594	  break;
595
596	case FTS_NS:
597	case FTS_SLNONE:
598	default:
599	  continue;
600	}
601
602      /* We only get here when finished or in error cases.  */
603      break;
604    }
605  fts_close (fts);
606  free (dirs[0]);
607
608  return result;
609}
610
611int
612dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
613{
614  Dwarf_Addr start;
615  Dwarf_Addr end;
616  inline Dwfl_Module *report (void)
617    {
618      return INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end);
619    }
620
621  /* This is a bit of a kludge.  If we already reported the kernel,
622     don't bother figuring it out again--it never changes.  */
623  for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
624    if (!strcmp (m->name, KERNEL_MODNAME))
625      {
626	start = m->low_addr;
627	end = m->high_addr;
628	return report () == NULL ? -1 : 0;
629      }
630
631  /* Try to figure out the bounds of the kernel image without
632     looking for any vmlinux file.  */
633  Dwarf_Addr notes;
634  /* The compiler cannot deduce that if intuit_kernel_bounds returns
635     zero NOTES will be initialized.  Fake the initialization.  */
636  asm ("" : "=m" (notes));
637  int result = intuit_kernel_bounds (&start, &end, &notes);
638  if (result == 0)
639    {
640      Dwfl_Module *mod = report ();
641      return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
642    }
643  if (result != ENOENT)
644    return result;
645
646  /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
647  return report_kernel (dwfl, NULL, NULL);
648}
649INTDEF (dwfl_linux_kernel_report_kernel)
650
651
652/* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
653
654int
655dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
656			    void **userdata __attribute__ ((unused)),
657			    const char *module_name,
658			    Dwarf_Addr base __attribute__ ((unused)),
659			    char **file_name, Elf **elfp)
660{
661  if (mod->build_id_len > 0)
662    {
663      int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
664					       file_name, elfp);
665      if (fd >= 0 || mod->main.elf != NULL || errno != 0)
666	return fd;
667    }
668
669  const char *release = kernel_release ();
670  if (release == NULL)
671    return errno;
672
673  if (!strcmp (module_name, KERNEL_MODNAME))
674    return find_kernel_elf (mod->dwfl, release, file_name);
675
676  /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
677
678  char *modulesdir[] = { NULL, NULL };
679  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
680    return -1;
681
682  FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
683  if (fts == NULL)
684    {
685      free (modulesdir[0]);
686      return -1;
687    }
688
689  size_t namelen = strlen (module_name);
690
691  /* This is a kludge.  There is no actual necessary relationship between
692     the name of the .ko file installed and the module name the kernel
693     knows it by when it's loaded.  The kernel's only idea of the module
694     name comes from the name embedded in the object's magic
695     .gnu.linkonce.this_module section.
696
697     In practice, these module names match the .ko file names except for
698     some using '_' and some using '-'.  So our cheap kludge is to look for
699     two files when either a '_' or '-' appears in a module name, one using
700     only '_' and one only using '-'.  */
701
702  char alternate_name[namelen + 1];
703  inline bool subst_name (char from, char to)
704    {
705      const char *n = memchr (module_name, from, namelen);
706      if (n == NULL)
707	return false;
708      char *a = mempcpy (alternate_name, module_name, n - module_name);
709      *a++ = to;
710      ++n;
711      const char *p;
712      while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
713	{
714	  a = mempcpy (a, n, p - n);
715	  *a++ = to;
716	  n = p + 1;
717	}
718      memcpy (a, n, namelen - (n - module_name) + 1);
719      return true;
720    }
721  if (!subst_name ('-', '_') && !subst_name ('_', '-'))
722    alternate_name[0] = '\0';
723
724  FTSENT *f;
725  int error = ENOENT;
726  while ((f = fts_read (fts)) != NULL)
727    {
728      /* Skip a "source" subtree, which tends to be large.
729	 This insane hard-coding of names is what depmod does too.  */
730      if (f->fts_namelen == sizeof "source" - 1
731	  && !strcmp (f->fts_name, "source"))
732	{
733	  fts_set (fts, f, FTS_SKIP);
734	  continue;
735	}
736
737      error = ENOENT;
738      switch (f->fts_info)
739	{
740	case FTS_F:
741	case FTS_SL:
742	case FTS_NSOK:
743	  /* See if this file name is "MODULE_NAME.ko".  */
744	  if (check_suffix (f, namelen)
745	      && (!memcmp (f->fts_name, module_name, namelen)
746		  || !memcmp (f->fts_name, alternate_name, namelen)))
747	    {
748	      int fd = open64 (f->fts_accpath, O_RDONLY);
749	      *file_name = strdup (f->fts_path);
750	      fts_close (fts);
751	      free (modulesdir[0]);
752	      if (fd < 0)
753		free (*file_name);
754	      else if (*file_name == NULL)
755		{
756		  close (fd);
757		  fd = -1;
758		}
759	      return fd;
760	    }
761	  break;
762
763	case FTS_ERR:
764	case FTS_DNR:
765	case FTS_NS:
766	  error = f->fts_errno;
767	  break;
768
769	case FTS_SLNONE:
770	default:
771	  break;
772	}
773    }
774
775  fts_close (fts);
776  free (modulesdir[0]);
777  errno = error;
778  return -1;
779}
780INTDEF (dwfl_linux_kernel_find_elf)
781
782
783/* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
784   We read the information from /sys/module directly.  */
785
786int
787dwfl_linux_kernel_module_section_address
788(Dwfl_Module *mod __attribute__ ((unused)),
789 void **userdata __attribute__ ((unused)),
790 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
791 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
792 const GElf_Shdr *shdr __attribute__ ((unused)),
793 Dwarf_Addr *addr)
794{
795  char *sysfile;
796  if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
797    return DWARF_CB_ABORT;
798
799  FILE *f = fopen (sysfile, "r");
800  free (sysfile);
801
802  if (f == NULL)
803    {
804      if (errno == ENOENT)
805	{
806	  /* The .modinfo and .data.percpu sections are never kept
807	     loaded in the kernel.  If the kernel was compiled without
808	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
809	     actually loaded at all.
810
811	     Setting *ADDR to -1 tells the caller this section is
812	     actually absent from memory.  */
813
814	  if (!strcmp (secname, ".modinfo")
815	      || !strcmp (secname, ".data.percpu")
816	      || !strncmp (secname, ".exit", 5))
817	    {
818	      *addr = (Dwarf_Addr) -1l;
819	      return DWARF_CB_OK;
820	    }
821
822	  /* The goofy PPC64 module_frob_arch_sections function tweaks
823	     the section names as a way to control other kernel code's
824	     behavior, and this cruft leaks out into the /sys information.
825	     The file name for ".init*" may actually look like "_init*".  */
826
827	  const bool is_init = !strncmp (secname, ".init", 5);
828	  if (is_init)
829	    {
830	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
831			    modname, &secname[1]) < 0)
832		return ENOMEM;
833	      f = fopen (sysfile, "r");
834	      free (sysfile);
835	      if (f != NULL)
836		goto ok;
837	    }
838
839	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
840	     In case that size increases in the future, look for longer
841	     truncated names first.  */
842	  size_t namelen = strlen (secname);
843	  if (namelen >= MODULE_SECT_NAME_LEN)
844	    {
845	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
846				  modname, secname);
847	      if (len < 0)
848		return DWARF_CB_ABORT;
849	      char *end = sysfile + len;
850	      do
851		{
852		  *--end = '\0';
853		  f = fopen (sysfile, "r");
854		  if (is_init && f == NULL && errno == ENOENT)
855		    {
856		      sysfile[len - namelen] = '_';
857		      f = fopen (sysfile, "r");
858		      sysfile[len - namelen] = '.';
859		    }
860		}
861	      while (f == NULL && errno == ENOENT
862		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
863	      free (sysfile);
864
865	      if (f != NULL)
866		goto ok;
867	    }
868	}
869
870      return DWARF_CB_ABORT;
871    }
872
873 ok:
874  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
875
876  int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
877		: ferror_unlocked (f) ? errno : ENOEXEC);
878  fclose (f);
879
880  if (result == 0)
881    return DWARF_CB_OK;
882
883  errno = result;
884  return DWARF_CB_ABORT;
885}
886INTDEF (dwfl_linux_kernel_module_section_address)
887
888int
889dwfl_linux_kernel_report_modules (Dwfl *dwfl)
890{
891  FILE *f = fopen (MODULELIST, "r");
892  if (f == NULL)
893    return errno;
894
895  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
896
897  int result = 0;
898  Dwarf_Addr modaddr;
899  unsigned long int modsz;
900  char modname[128];
901  char *line = NULL;
902  size_t linesz = 0;
903  /* We can't just use fscanf here because it's not easy to distinguish \n
904     from other whitespace so as to take the optional word following the
905     address but always stop at the end of the line.  */
906  while (getline (&line, &linesz, f) > 0
907	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
908		    modname, &modsz, &modaddr) == 3)
909    {
910      Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
911						     modaddr, modaddr + modsz);
912      if (mod == NULL)
913	{
914	  result = -1;
915	  break;
916	}
917
918      result = check_module_notes (mod);
919    }
920  free (line);
921
922  if (result == 0)
923    result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
924
925  fclose (f);
926
927  return result;
928}
929INTDEF (dwfl_linux_kernel_report_modules)
930