linux-kernel-modules.c revision 251f1459aa54c55e2fe51f5499709253984b99a5
1/* Standard libdwfl callbacks for debugging the running Linux kernel.
2   Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3   This file is part of elfutils.
4
5   This file is free software; you can redistribute it and/or modify
6   it under the terms of either
7
8     * the GNU Lesser General Public License as published by the Free
9       Software Foundation; either version 3 of the License, or (at
10       your option) any later version
11
12   or
13
14     * the GNU General Public License as published by the Free
15       Software Foundation; either version 2 of the License, or (at
16       your option) any later version
17
18   or both in parallel, as here.
19
20   elfutils is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received copies of the GNU General Public License and
26   the GNU Lesser General Public License along with this program.  If
27   not, see <http://www.gnu.org/licenses/>.  */
28
29/* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
30   Everything we need here is fine if its declarations just come first.  */
31
32#include <fts.h>
33
34#include <config.h>
35
36#include "libdwflP.h"
37#include <inttypes.h>
38#include <errno.h>
39#include <stdio.h>
40#include <stdio_ext.h>
41#include <string.h>
42#include <stdlib.h>
43#include <sys/utsname.h>
44#include <fcntl.h>
45#include <unistd.h>
46
47
48#define KERNEL_MODNAME	"kernel"
49
50#define MODULEDIRFMT	"/lib/modules/%s"
51
52#define KNOTESFILE	"/sys/kernel/notes"
53#define	MODNOTESFMT	"/sys/module/%s/notes"
54#define KSYMSFILE	"/proc/kallsyms"
55#define MODULELIST	"/proc/modules"
56#define	SECADDRDIRFMT	"/sys/module/%s/sections/"
57#define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
58
59
60#if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
61static const char *vmlinux_suffixes[] =
62  {
63#ifdef USE_ZLIB
64    ".gz",
65#endif
66#ifdef USE_BZLIB
67    ".bz2",
68#endif
69#ifdef USE_LZMA
70    ".xz",
71#endif
72  };
73#endif
74
75/* Try to open the given file as it is or under the debuginfo directory.  */
76static int
77try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
78{
79  if (*fname == NULL)
80    return -1;
81
82  /* Don't bother trying *FNAME itself here if the path will cause it to be
83     tried because we give its own basename as DEBUGLINK_FILE.  */
84  int fd = ((((dwfl->callbacks->debuginfo_path
85	       ? *dwfl->callbacks->debuginfo_path : NULL)
86	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
87	    : TEMP_FAILURE_RETRY (open64 (*fname, O_RDONLY)));
88
89  if (fd < 0)
90    {
91      Dwfl_Module fakemod = { .dwfl = dwfl };
92      /* First try the file's unadorned basename as DEBUGLINK_FILE,
93	 to look for "vmlinux" files.  */
94      fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
95						 *fname, basename (*fname), 0,
96						 &fakemod.debug.name);
97      if (fd < 0 && try_debug)
98	/* Next, let the call use the default of basename + ".debug",
99	   to look for "vmlinux.debug" files.  */
100	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
101						   *fname, NULL, 0,
102						   &fakemod.debug.name);
103      if (fakemod.debug.name != NULL)
104	{
105	  free (*fname);
106	  *fname = fakemod.debug.name;
107	}
108    }
109
110#if defined (USE_ZLIB) || defined (USE_BZLIB) || defined (USE_LZMA)
111  if (fd < 0)
112    for (size_t i = 0;
113	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
114	 ++i)
115      {
116	char *zname;
117	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
118	  {
119	    fd = TEMP_FAILURE_RETRY (open64 (zname, O_RDONLY));
120	    if (fd < 0)
121	      free (zname);
122	    else
123	      {
124		free (*fname);
125		*fname = zname;
126	      }
127	  }
128      }
129#endif
130
131  if (fd < 0)
132    {
133      free (*fname);
134      *fname = NULL;
135    }
136
137  return fd;
138}
139
140static inline const char *
141kernel_release (void)
142{
143  /* Cache the `uname -r` string we'll use.  */
144  static struct utsname utsname;
145  if (utsname.release[0] == '\0' && uname (&utsname) != 0)
146    return NULL;
147  return utsname.release;
148}
149
150static int
151find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
152{
153  if ((release[0] == '/'
154       ? asprintf (fname, "%s/vmlinux", release)
155       : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
156    return -1;
157
158  int fd = try_kernel_name (dwfl, fname, true);
159  if (fd < 0 && release[0] != '/')
160    {
161      free (*fname);
162      if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
163	return -1;
164      fd = try_kernel_name (dwfl, fname, true);
165    }
166
167  return fd;
168}
169
170static int
171get_release (Dwfl *dwfl, const char **release)
172{
173  if (dwfl == NULL)
174    return -1;
175
176  const char *release_string = release == NULL ? NULL : *release;
177  if (release_string == NULL)
178    {
179      release_string = kernel_release ();
180      if (release_string == NULL)
181	return errno;
182      if (release != NULL)
183	*release = release_string;
184    }
185
186  return 0;
187}
188
189static int
190report_kernel (Dwfl *dwfl, const char **release,
191	       int (*predicate) (const char *module, const char *file))
192{
193  int result = get_release (dwfl, release);
194  if (unlikely (result != 0))
195    return result;
196
197  char *fname;
198  int fd = find_kernel_elf (dwfl, *release, &fname);
199
200  if (fd < 0)
201    result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
202	      ? 0 : errno ?: ENOENT);
203  else
204    {
205      bool report = true;
206
207      if (predicate != NULL)
208	{
209	  /* Let the predicate decide whether to use this one.  */
210	  int want = (*predicate) (KERNEL_MODNAME, fname);
211	  if (want < 0)
212	    result = errno;
213	  report = want > 0;
214	}
215
216      if (report)
217	{
218	  /* Note that on some architectures (e.g. x86_64) the vmlinux
219	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
220	     In both cases the phdr p_vaddr load address will be non-zero.
221	     We want the image to be placed as if it was ET_DYN, so
222	     pass true for add_p_vaddr which will do the right thing
223	     (in combination with a zero base) in either case.  */
224	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
225						      fname, fd, 0, true);
226	  if (mod == NULL)
227	    result = -1;
228	  else
229	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
230	    mod->e_type = ET_DYN;
231	}
232
233      free (fname);
234
235      if (!report || result < 0)
236	close (fd);
237    }
238
239  return result;
240}
241
242/* Look for a kernel debug archive.  If we find one, report all its modules.
243   If not, return ENOENT.  */
244static int
245report_kernel_archive (Dwfl *dwfl, const char **release,
246		       int (*predicate) (const char *module, const char *file))
247{
248  int result = get_release (dwfl, release);
249  if (unlikely (result != 0))
250    return result;
251
252  char *archive;
253  int res = (((*release)[0] == '/')
254	     ? asprintf (&archive, "%s/debug.a", *release)
255	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
256  if (unlikely (res < 0))
257    return ENOMEM;
258
259  int fd = try_kernel_name (dwfl, &archive, false);
260  if (fd < 0)
261    result = errno ?: ENOENT;
262  else
263    {
264      /* We have the archive file open!  */
265      Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
266						    true, predicate);
267      if (unlikely (last == NULL))
268	result = -1;
269      else
270	{
271	  /* Find the kernel and move it to the head of the list.  */
272	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
273	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
274	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
275	      {
276		*prevp = m->next;
277		m->next = *tailp;
278		*tailp = m;
279		break;
280	      }
281	}
282    }
283
284  free (archive);
285  return result;
286}
287
288static size_t
289check_suffix (const FTSENT *f, size_t namelen)
290{
291#define TRY(sfx)							\
292  if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
293       : f->fts_namelen >= sizeof sfx)					\
294      && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
295		  sfx, sizeof sfx))					\
296    return sizeof sfx - 1
297
298  TRY (".ko");
299#if USE_ZLIB
300  TRY (".ko.gz");
301#endif
302#if USE_BZLIB
303  TRY (".ko.bz2");
304#endif
305#if USE_LZMA
306  TRY (".ko.xz");
307#endif
308
309  return 0;
310
311#undef	TRY
312}
313
314/* Report a kernel and all its modules found on disk, for offline use.
315   If RELEASE starts with '/', it names a directory to look in;
316   if not, it names a directory to find under /lib/modules/;
317   if null, /lib/modules/`uname -r` is used.
318   Returns zero on success, -1 if dwfl_report_module failed,
319   or an errno code if finding the files on disk failed.  */
320int
321dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
322				  int (*predicate) (const char *module,
323						    const char *file))
324{
325  int result = report_kernel_archive (dwfl, &release, predicate);
326  if (result != ENOENT)
327    return result;
328
329  /* First report the kernel.  */
330  result = report_kernel (dwfl, &release, predicate);
331  if (result == 0)
332    {
333      /* Do "find /lib/modules/RELEASE -name *.ko".  */
334
335      char *modulesdir[] = { NULL, NULL };
336      if (release[0] == '/')
337	modulesdir[0] = (char *) release;
338      else
339	{
340	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
341	    return errno;
342	}
343
344      FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
345      if (modulesdir[0] == (char *) release)
346	modulesdir[0] = NULL;
347      if (fts == NULL)
348	{
349	  free (modulesdir[0]);
350	  return errno;
351	}
352
353      FTSENT *f;
354      while ((f = fts_read (fts)) != NULL)
355	{
356	  /* Skip a "source" subtree, which tends to be large.
357	     This insane hard-coding of names is what depmod does too.  */
358	  if (f->fts_namelen == sizeof "source" - 1
359	      && !strcmp (f->fts_name, "source"))
360	    {
361	      fts_set (fts, f, FTS_SKIP);
362	      continue;
363	    }
364
365	  switch (f->fts_info)
366	    {
367	    case FTS_F:
368	    case FTS_SL:
369	    case FTS_NSOK:;
370	      /* See if this file name matches "*.ko".  */
371	      const size_t suffix = check_suffix (f, 0);
372	      if (suffix)
373		{
374		  /* We have a .ko file to report.  Following the algorithm
375		     by which the kernel makefiles set KBUILD_MODNAME, we
376		     replace all ',' or '-' with '_' in the file name and
377		     call that the module name.  Modules could well be
378		     built using different embedded names than their file
379		     names.  To handle that, we would have to look at the
380		     __this_module.name contents in the module's text.  */
381
382		  char *name = strndup (f->fts_name, f->fts_namelen - suffix);
383		  if (unlikely (name == NULL))
384		    {
385		      __libdwfl_seterrno (DWFL_E_NOMEM);
386		      result = -1;
387		      break;
388		    }
389		  for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
390		    if (name[i] == '-' || name[i] == ',')
391		      name[i] = '_';
392
393		  if (predicate != NULL)
394		    {
395		      /* Let the predicate decide whether to use this one.  */
396		      int want = (*predicate) (name, f->fts_path);
397		      if (want < 0)
398			{
399			  result = -1;
400			  free (name);
401			  break;
402			}
403		      if (!want)
404			{
405			  free (name);
406			  continue;
407			}
408		    }
409
410		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
411		    {
412		      free (name);
413		      result = -1;
414		      break;
415		    }
416		  free (name);
417		}
418	      continue;
419
420	    case FTS_ERR:
421	    case FTS_DNR:
422	    case FTS_NS:
423	      result = f->fts_errno;
424	      break;
425
426	    case FTS_SLNONE:
427	    default:
428	      continue;
429	    }
430
431	  /* We only get here in error cases.  */
432	  break;
433	}
434      fts_close (fts);
435      free (modulesdir[0]);
436    }
437
438  return result;
439}
440INTDEF (dwfl_linux_kernel_report_offline)
441
442
443/* Grovel around to guess the bounds of the runtime kernel image.  */
444static int
445intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
446{
447  FILE *f = fopen (KSYMSFILE, "r");
448  if (f == NULL)
449    return errno;
450
451  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
452
453  *notes = 0;
454
455  char *line = NULL;
456  size_t linesz = 0;
457  size_t n;
458  char *p = NULL;
459  const char *type;
460
461  inline bool read_address (Dwarf_Addr *addr)
462  {
463    if ((n = getline (&line, &linesz, f)) < 1 || line[n - 2] == ']')
464      return false;
465    *addr = strtoull (line, &p, 16);
466    p += strspn (p, " \t");
467    type = strsep (&p, " \t\n");
468    if (type == NULL)
469      return false;
470    return p != NULL && p != line;
471  }
472
473  int result;
474  do
475    result = read_address (start) ? 0 : -1;
476  while (result == 0 && strchr ("TtRr", *type) == NULL);
477
478  if (result == 0)
479    {
480      *end = *start;
481      while (read_address (end))
482	if (*notes == 0 && !strcmp (p, "__start_notes\n"))
483	  *notes = *end;
484
485      Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
486      *start &= -(Dwarf_Addr) round_kernel;
487      *end += round_kernel - 1;
488      *end &= -(Dwarf_Addr) round_kernel;
489      if (*start >= *end || *end - *start < round_kernel)
490	result = -1;
491    }
492  free (line);
493
494  if (result == -1)
495    result = ferror_unlocked (f) ? errno : ENOEXEC;
496
497  fclose (f);
498
499  return result;
500}
501
502
503/* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
504static int
505check_notes (Dwfl_Module *mod, const char *notesfile,
506	     Dwarf_Addr vaddr, const char *secname)
507{
508  int fd = open64 (notesfile, O_RDONLY);
509  if (fd < 0)
510    return 1;
511
512  assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
513  assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
514  union
515  {
516    GElf_Nhdr nhdr;
517    unsigned char data[8192];
518  } buf;
519
520  ssize_t n = read (fd, buf.data, sizeof buf);
521  close (fd);
522
523  if (n <= 0)
524    return 1;
525
526  unsigned char *p = buf.data;
527  while (p < &buf.data[n])
528    {
529      /* No translation required since we are reading the native kernel.  */
530      GElf_Nhdr *nhdr = (void *) p;
531      p += sizeof *nhdr;
532      unsigned char *name = p;
533      p += (nhdr->n_namesz + 3) & -4U;
534      unsigned char *bits = p;
535      p += (nhdr->n_descsz + 3) & -4U;
536
537      if (p <= &buf.data[n]
538	  && nhdr->n_type == NT_GNU_BUILD_ID
539	  && nhdr->n_namesz == sizeof "GNU"
540	  && !memcmp (name, "GNU", sizeof "GNU"))
541	{
542	  /* Found it.  For a module we must figure out its VADDR now.  */
543
544	  if (secname != NULL
545	      && (INTUSE(dwfl_linux_kernel_module_section_address)
546		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
547		  || vaddr == (GElf_Addr) -1l))
548	    vaddr = 0;
549
550	  if (vaddr != 0)
551	    vaddr += bits - buf.data;
552	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
553						      nhdr->n_descsz, vaddr);
554	}
555    }
556
557  return 0;
558}
559
560/* Look for a build ID for the kernel.  */
561static int
562check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
563{
564  return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
565}
566
567/* Look for a build ID for a loaded kernel module.  */
568static int
569check_module_notes (Dwfl_Module *mod)
570{
571  char *dirs[2] = { NULL, NULL };
572  if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
573    return ENOMEM;
574
575  FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
576  if (fts == NULL)
577    {
578      free (dirs[0]);
579      return 0;
580    }
581
582  int result = 0;
583  FTSENT *f;
584  while ((f = fts_read (fts)) != NULL)
585    {
586      switch (f->fts_info)
587	{
588	case FTS_F:
589	case FTS_SL:
590	case FTS_NSOK:
591	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
592	  if (result > 0)	/* Nothing found.  */
593	    {
594	      result = 0;
595	      continue;
596	    }
597	  break;
598
599	case FTS_ERR:
600	case FTS_DNR:
601	  result = f->fts_errno;
602	  break;
603
604	case FTS_NS:
605	case FTS_SLNONE:
606	default:
607	  continue;
608	}
609
610      /* We only get here when finished or in error cases.  */
611      break;
612    }
613  fts_close (fts);
614  free (dirs[0]);
615
616  return result;
617}
618
619int
620dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
621{
622  Dwarf_Addr start;
623  Dwarf_Addr end;
624  inline Dwfl_Module *report (void)
625    {
626      return INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end);
627    }
628
629  /* This is a bit of a kludge.  If we already reported the kernel,
630     don't bother figuring it out again--it never changes.  */
631  for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
632    if (!strcmp (m->name, KERNEL_MODNAME))
633      {
634	start = m->low_addr;
635	end = m->high_addr;
636	return report () == NULL ? -1 : 0;
637      }
638
639  /* Try to figure out the bounds of the kernel image without
640     looking for any vmlinux file.  */
641  Dwarf_Addr notes;
642  /* The compiler cannot deduce that if intuit_kernel_bounds returns
643     zero NOTES will be initialized.  Fake the initialization.  */
644  asm ("" : "=m" (notes));
645  int result = intuit_kernel_bounds (&start, &end, &notes);
646  if (result == 0)
647    {
648      Dwfl_Module *mod = report ();
649      return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
650    }
651  if (result != ENOENT)
652    return result;
653
654  /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
655  return report_kernel (dwfl, NULL, NULL);
656}
657INTDEF (dwfl_linux_kernel_report_kernel)
658
659
660/* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
661
662int
663dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
664			    void **userdata __attribute__ ((unused)),
665			    const char *module_name,
666			    Dwarf_Addr base __attribute__ ((unused)),
667			    char **file_name, Elf **elfp)
668{
669  if (mod->build_id_len > 0)
670    {
671      int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
672					       file_name, elfp);
673      if (fd >= 0 || mod->main.elf != NULL || errno != 0)
674	return fd;
675    }
676
677  const char *release = kernel_release ();
678  if (release == NULL)
679    return errno;
680
681  if (!strcmp (module_name, KERNEL_MODNAME))
682    return find_kernel_elf (mod->dwfl, release, file_name);
683
684  /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
685
686  char *modulesdir[] = { NULL, NULL };
687  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
688    return -1;
689
690  FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
691  if (fts == NULL)
692    {
693      free (modulesdir[0]);
694      return -1;
695    }
696
697  size_t namelen = strlen (module_name);
698
699  /* This is a kludge.  There is no actual necessary relationship between
700     the name of the .ko file installed and the module name the kernel
701     knows it by when it's loaded.  The kernel's only idea of the module
702     name comes from the name embedded in the object's magic
703     .gnu.linkonce.this_module section.
704
705     In practice, these module names match the .ko file names except for
706     some using '_' and some using '-'.  So our cheap kludge is to look for
707     two files when either a '_' or '-' appears in a module name, one using
708     only '_' and one only using '-'.  */
709
710  char alternate_name[namelen + 1];
711  inline bool subst_name (char from, char to)
712    {
713      const char *n = memchr (module_name, from, namelen);
714      if (n == NULL)
715	return false;
716      char *a = mempcpy (alternate_name, module_name, n - module_name);
717      *a++ = to;
718      ++n;
719      const char *p;
720      while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
721	{
722	  a = mempcpy (a, n, p - n);
723	  *a++ = to;
724	  n = p + 1;
725	}
726      memcpy (a, n, namelen - (n - module_name) + 1);
727      return true;
728    }
729  if (!subst_name ('-', '_') && !subst_name ('_', '-'))
730    alternate_name[0] = '\0';
731
732  FTSENT *f;
733  int error = ENOENT;
734  while ((f = fts_read (fts)) != NULL)
735    {
736      /* Skip a "source" subtree, which tends to be large.
737	 This insane hard-coding of names is what depmod does too.  */
738      if (f->fts_namelen == sizeof "source" - 1
739	  && !strcmp (f->fts_name, "source"))
740	{
741	  fts_set (fts, f, FTS_SKIP);
742	  continue;
743	}
744
745      error = ENOENT;
746      switch (f->fts_info)
747	{
748	case FTS_F:
749	case FTS_SL:
750	case FTS_NSOK:
751	  /* See if this file name is "MODULE_NAME.ko".  */
752	  if (check_suffix (f, namelen)
753	      && (!memcmp (f->fts_name, module_name, namelen)
754		  || !memcmp (f->fts_name, alternate_name, namelen)))
755	    {
756	      int fd = open64 (f->fts_accpath, O_RDONLY);
757	      *file_name = strdup (f->fts_path);
758	      fts_close (fts);
759	      free (modulesdir[0]);
760	      if (fd < 0)
761		free (*file_name);
762	      else if (*file_name == NULL)
763		{
764		  close (fd);
765		  fd = -1;
766		}
767	      return fd;
768	    }
769	  break;
770
771	case FTS_ERR:
772	case FTS_DNR:
773	case FTS_NS:
774	  error = f->fts_errno;
775	  break;
776
777	case FTS_SLNONE:
778	default:
779	  break;
780	}
781    }
782
783  fts_close (fts);
784  free (modulesdir[0]);
785  errno = error;
786  return -1;
787}
788INTDEF (dwfl_linux_kernel_find_elf)
789
790
791/* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
792   We read the information from /sys/module directly.  */
793
794int
795dwfl_linux_kernel_module_section_address
796(Dwfl_Module *mod __attribute__ ((unused)),
797 void **userdata __attribute__ ((unused)),
798 const char *modname, Dwarf_Addr base __attribute__ ((unused)),
799 const char *secname, Elf32_Word shndx __attribute__ ((unused)),
800 const GElf_Shdr *shdr __attribute__ ((unused)),
801 Dwarf_Addr *addr)
802{
803  char *sysfile;
804  if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
805    return DWARF_CB_ABORT;
806
807  FILE *f = fopen (sysfile, "r");
808  free (sysfile);
809
810  if (f == NULL)
811    {
812      if (errno == ENOENT)
813	{
814	  /* The .modinfo and .data.percpu sections are never kept
815	     loaded in the kernel.  If the kernel was compiled without
816	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
817	     actually loaded at all.
818
819	     Setting *ADDR to -1 tells the caller this section is
820	     actually absent from memory.  */
821
822	  if (!strcmp (secname, ".modinfo")
823	      || !strcmp (secname, ".data.percpu")
824	      || !strncmp (secname, ".exit", 5))
825	    {
826	      *addr = (Dwarf_Addr) -1l;
827	      return DWARF_CB_OK;
828	    }
829
830	  /* The goofy PPC64 module_frob_arch_sections function tweaks
831	     the section names as a way to control other kernel code's
832	     behavior, and this cruft leaks out into the /sys information.
833	     The file name for ".init*" may actually look like "_init*".  */
834
835	  const bool is_init = !strncmp (secname, ".init", 5);
836	  if (is_init)
837	    {
838	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
839			    modname, &secname[1]) < 0)
840		return ENOMEM;
841	      f = fopen (sysfile, "r");
842	      free (sysfile);
843	      if (f != NULL)
844		goto ok;
845	    }
846
847	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
848	     In case that size increases in the future, look for longer
849	     truncated names first.  */
850	  size_t namelen = strlen (secname);
851	  if (namelen >= MODULE_SECT_NAME_LEN)
852	    {
853	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
854				  modname, secname);
855	      if (len < 0)
856		return DWARF_CB_ABORT;
857	      char *end = sysfile + len;
858	      do
859		{
860		  *--end = '\0';
861		  f = fopen (sysfile, "r");
862		  if (is_init && f == NULL && errno == ENOENT)
863		    {
864		      sysfile[len - namelen] = '_';
865		      f = fopen (sysfile, "r");
866		      sysfile[len - namelen] = '.';
867		    }
868		}
869	      while (f == NULL && errno == ENOENT
870		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
871	      free (sysfile);
872
873	      if (f != NULL)
874		goto ok;
875	    }
876	}
877
878      return DWARF_CB_ABORT;
879    }
880
881 ok:
882  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
883
884  int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
885		: ferror_unlocked (f) ? errno : ENOEXEC);
886  fclose (f);
887
888  if (result == 0)
889    return DWARF_CB_OK;
890
891  errno = result;
892  return DWARF_CB_ABORT;
893}
894INTDEF (dwfl_linux_kernel_module_section_address)
895
896int
897dwfl_linux_kernel_report_modules (Dwfl *dwfl)
898{
899  FILE *f = fopen (MODULELIST, "r");
900  if (f == NULL)
901    return errno;
902
903  (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
904
905  int result = 0;
906  Dwarf_Addr modaddr;
907  unsigned long int modsz;
908  char modname[128];
909  char *line = NULL;
910  size_t linesz = 0;
911  /* We can't just use fscanf here because it's not easy to distinguish \n
912     from other whitespace so as to take the optional word following the
913     address but always stop at the end of the line.  */
914  while (getline (&line, &linesz, f) > 0
915	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
916		    modname, &modsz, &modaddr) == 3)
917    {
918      Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
919						     modaddr, modaddr + modsz);
920      if (mod == NULL)
921	{
922	  result = -1;
923	  break;
924	}
925
926      result = check_module_notes (mod);
927    }
928  free (line);
929
930  if (result == 0)
931    result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
932
933  fclose (f);
934
935  return result;
936}
937INTDEF (dwfl_linux_kernel_report_modules)
938