elf.c revision f0cb39bc6abe181a0abdd1f6c778521ae8497277
1
2/*--------------------------------------------------------------------*/
3/*--- User-mode execve() for ELF executables           m_ume_elf.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2010 Julian Seward
11      jseward@acm.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#if defined(VGO_linux)
32
33#include "pub_core_basics.h"
34#include "pub_core_vki.h"
35
36#include "pub_core_aspacemgr.h"     // various mapping fns
37#include "pub_core_debuglog.h"
38#include "pub_core_libcassert.h"    // VG_(exit), vg_assert
39#include "pub_core_libcbase.h"      // VG_(memcmp), etc
40#include "pub_core_libcprint.h"
41#include "pub_core_libcfile.h"      // VG_(open) et al
42#include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
43#include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
44#include "pub_core_syscall.h"       // VG_(strerror)
45#include "pub_core_ume.h"           // self
46#include "pub_tool_libcproc.h"      // VG_(getenv)
47
48#include "priv_ume.h"
49
50/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
51#define _GNU_SOURCE
52#define _FILE_OFFSET_BITS 64
53/* This is for ELF types etc, and also the AT_ constants. */
54#include <elf.h>
55/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
56
57
58#if     VG_WORDSIZE == 8
59#define ESZ(x)  Elf64_##x
60#elif   VG_WORDSIZE == 4
61#define ESZ(x)  Elf32_##x
62#else
63#error VG_WORDSIZE needs to ==4 or ==8
64#endif
65
66struct elfinfo
67{
68   ESZ(Ehdr)    e;
69   ESZ(Phdr)    *p;
70   Int          fd;
71};
72
73static void check_mmap(SysRes res, Addr base, SizeT len)
74{
75   if (sr_isError(res)) {
76      VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
77                  "with error %lu (%s).\n",
78                  (ULong)base, (Long)len,
79                  sr_Err(res), VG_(strerror)(sr_Err(res)) );
80      if (sr_Err(res) == VKI_EINVAL) {
81         VG_(printf)("valgrind: this can be caused by executables with "
82                     "very large text, data or bss segments.\n");
83      }
84      VG_(exit)(1);
85   }
86}
87
88/*------------------------------------------------------------*/
89/*--- Loading ELF files                                    ---*/
90/*------------------------------------------------------------*/
91
92static
93struct elfinfo *readelf(Int fd, const char *filename)
94{
95   SysRes sres;
96   struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
97   Int phsz;
98
99   vg_assert(e);
100   e->fd = fd;
101
102   sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
103   if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
104      VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
105                  filename, VG_(strerror)(sr_Err(sres)));
106      goto bad;
107   }
108
109   if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
110      VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
111      goto bad;
112   }
113   if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
114      VG_(printf)("valgrind: wrong ELF executable class "
115                  "(eg. 32-bit instead of 64-bit)\n");
116      goto bad;
117   }
118   if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
119      VG_(printf)("valgrind: executable has wrong endian-ness\n");
120      goto bad;
121   }
122   if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
123      VG_(printf)("valgrind: this is not an executable\n");
124      goto bad;
125   }
126
127   if (e->e.e_machine != VG_ELF_MACHINE) {
128      VG_(printf)("valgrind: executable is not for "
129                  "this architecture\n");
130      goto bad;
131   }
132
133   if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
134      VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
135      goto bad;
136   }
137
138   phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
139   e->p = VG_(malloc)("ume.re.2", phsz);
140   vg_assert(e->p);
141
142   sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
143   if (sr_isError(sres) || sr_Res(sres) != phsz) {
144      VG_(printf)("valgrind: can't read phdr: %s\n",
145                  VG_(strerror)(sr_Err(sres)));
146      VG_(free)(e->p);
147      goto bad;
148   }
149
150   return e;
151
152  bad:
153   VG_(free)(e);
154   return NULL;
155}
156
157/* Map an ELF file.  Returns the brk address. */
158static
159ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
160{
161   Int    i;
162   SysRes res;
163   ESZ(Addr) elfbrk = 0;
164
165   for (i = 0; i < e->e.e_phnum; i++) {
166      ESZ(Phdr) *ph = &e->p[i];
167      ESZ(Addr) addr, brkaddr;
168      ESZ(Word) memsz;
169
170      if (ph->p_type != PT_LOAD)
171         continue;
172
173      addr    = ph->p_vaddr+base;
174      memsz   = ph->p_memsz;
175      brkaddr = addr+memsz;
176
177      if (brkaddr > elfbrk)
178         elfbrk = brkaddr;
179   }
180
181   for (i = 0; i < e->e.e_phnum; i++) {
182      ESZ(Phdr) *ph = &e->p[i];
183      ESZ(Addr) addr, bss, brkaddr;
184      ESZ(Off) off;
185      ESZ(Word) filesz;
186      ESZ(Word) memsz;
187      unsigned prot = 0;
188
189      if (ph->p_type != PT_LOAD)
190         continue;
191
192      if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
193      if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
194      if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
195
196      addr    = ph->p_vaddr+base;
197      off     = ph->p_offset;
198      filesz  = ph->p_filesz;
199      bss     = addr+filesz;
200      memsz   = ph->p_memsz;
201      brkaddr = addr+memsz;
202
203      // Tom says: In the following, do what the Linux kernel does and only
204      // map the pages that are required instead of rounding everything to
205      // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
206      // use ph->p_align -- part of stage2's memory gets trashed somehow.)
207      //
208      // The condition handles the case of a zero-length segment.
209      if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
210         if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
211         res = VG_(am_mmap_file_fixed_client)(
212                  VG_PGROUNDDN(addr),
213                  VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
214                  prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
215                  e->fd, VG_PGROUNDDN(off)
216               );
217         if (0) VG_(am_show_nsegments)(0,"after #1");
218         check_mmap(res, VG_PGROUNDDN(addr),
219                         VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
220      }
221
222      // if memsz > filesz, fill the remainder with zeroed pages
223      if (memsz > filesz) {
224         UInt bytes;
225
226         bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
227         if (bytes > 0) {
228            if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
229            res = VG_(am_mmap_anon_fixed_client)(
230                     VG_PGROUNDUP(bss), bytes,
231                     prot
232                  );
233            if (0) VG_(am_show_nsegments)(0,"after #2");
234            check_mmap(res, VG_PGROUNDUP(bss), bytes);
235         }
236
237         bytes = bss & (VKI_PAGE_SIZE - 1);
238
239         // The 'prot' condition allows for a read-only bss
240         if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
241            bytes = VKI_PAGE_SIZE - bytes;
242            VG_(memset)((char *)bss, 0, bytes);
243         }
244      }
245   }
246
247   return elfbrk;
248}
249
250Bool VG_(match_ELF)(Char *hdr, Int len)
251{
252   ESZ(Ehdr) *e = (ESZ(Ehdr) *)hdr;
253   return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
254}
255
256
257/* load_ELF pulls an ELF executable into the address space, prepares
258   it for execution, and writes info about it into INFO.  In
259   particular it fills in .init_eip, which is the starting point.
260
261   Returns zero on success, non-zero (a VKI_E.. value) on failure.
262
263   The sequence of activities is roughly as follows:
264
265   - use readelf() to extract program header info from the exe file.
266
267   - scan the program header, collecting info (not sure what all those
268     info-> fields are, or whether they are used, but still) and in
269     particular looking out fo the PT_INTERP header, which describes
270     the interpreter.  If such a field is found, the space needed to
271     hold the interpreter is computed into interp_size.
272
273   - map the executable in, by calling mapelf().  This maps in all
274     loadable sections, and I _think_ also creates any .bss areas
275     required.  mapelf() returns the address just beyond the end of
276     the furthest-along mapping it creates.  The executable is mapped
277     starting at EBASE, which is usually read from it (eg, 0x8048000
278     etc) except if it's a PIE, in which case I'm not sure what
279     happens.
280
281     The returned address is recorded in info->brkbase as the start
282     point of the brk (data) segment, as it is traditional to place
283     the data segment just after the executable.  Neither load_ELF nor
284     mapelf creates the brk segment, though: that is for the caller of
285     load_ELF to attend to.
286
287   - If the initial phdr scan didn't find any mention of an
288     interpreter (interp == NULL), this must be a statically linked
289     executable, and we're pretty much done.
290
291   - Otherwise, we need to use mapelf() a second time to load the
292     interpreter.  The interpreter can go anywhere, but mapelf() wants
293     to be told a specific address to put it at.  So an advisory query
294     is passed to aspacem, asking where it would put an anonymous
295     client mapping of size INTERP_SIZE.  That address is then used
296     as the mapping address for the interpreter.
297
298   - The entry point in INFO is set to the interpreter's entry point,
299     and we're done.  */
300Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
301{
302   SysRes sres;
303   struct elfinfo *e;
304   struct elfinfo *interp = NULL;
305   ESZ(Addr) minaddr = ~0;      /* lowest mapped address */
306   ESZ(Addr) maxaddr = 0;       /* highest mapped address */
307   ESZ(Addr) interp_addr = 0;   /* interpreter (ld.so) address */
308   ESZ(Word) interp_size = 0;   /* interpreter size */
309   ESZ(Word) interp_align = VKI_PAGE_SIZE;
310   Int i;
311   void *entry;
312   ESZ(Addr) ebase = 0;
313
314   /* The difference between where the interpreter got mapped and
315      where it asked to be mapped.  Needed for computing the ppc64 ELF
316      entry point and initial tocptr (R2) value. */
317   ESZ(Word) interp_offset = 0;
318
319#ifdef HAVE_PIE
320   ebase = info->exe_base;
321#endif
322
323   e = readelf(fd, name);
324
325   if (e == NULL)
326      return VKI_ENOEXEC;
327
328   /* The kernel maps position-independent executables at TASK_SIZE*2/3;
329      duplicate this behavior as close as we can. */
330   if (e->e.e_type == ET_DYN && ebase == 0) {
331      ebase = VG_PGROUNDDN(info->exe_base
332                           + (info->exe_end - info->exe_base) * 2 / 3);
333      /* We really don't want to load PIEs at zero or too close.  It
334         works, but it's unrobust (NULL pointer reads and writes
335         become legit, which is really bad) and causes problems for
336         exp-ptrcheck, which assumes all numbers below 1MB are
337         nonpointers.  So, hackily, move it above 1MB. */
338      /* Later .. is appears ppc32-linux tries to put [vdso] at 1MB,
339         which totally screws things up, because nothing else can go
340         there.  So bump the hacky load addess along by 0x8000, to
341         0x108000. */
342      if (ebase < 0x108000)
343         ebase = 0x108000;
344   }
345
346   info->phnum = e->e.e_phnum;
347   info->entry = e->e.e_entry + ebase;
348   info->phdr = 0;
349
350   for (i = 0; i < e->e.e_phnum; i++) {
351      ESZ(Phdr) *ph = &e->p[i];
352
353      switch(ph->p_type) {
354      case PT_PHDR:
355         info->phdr = ph->p_vaddr + ebase;
356         break;
357
358      case PT_LOAD:
359         if (ph->p_vaddr < minaddr)
360            minaddr = ph->p_vaddr;
361         if (ph->p_vaddr+ph->p_memsz > maxaddr)
362            maxaddr = ph->p_vaddr+ph->p_memsz;
363         break;
364
365      case PT_INTERP: {
366         HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
367         Int j;
368         Int intfd;
369         Int baseaddr_set;
370
371         vg_assert(buf);
372         VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
373         buf[ph->p_filesz] = '\0';
374
375         sres = VG_(open)(buf, VKI_O_RDONLY, 0);
376         if (sr_isError(sres)) {
377            VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
378            VG_(exit)(1);
379         }
380         intfd = sr_Res(sres);
381
382         interp = readelf(intfd, buf);
383         if (interp == NULL) {
384            VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
385            return 1;
386         }
387         VG_(free)(buf);
388
389         baseaddr_set = 0;
390         for (j = 0; j < interp->e.e_phnum; j++) {
391            ESZ(Phdr) *iph = &interp->p[j];
392            ESZ(Addr) end;
393
394            if (iph->p_type != PT_LOAD)
395               continue;
396
397#ifdef ANDROID
398            // The first LOAD segment of /system/bin/linker has vaddr=0, memsz=0
399            // but subsequent segments start at 0xb0001000.
400            if (!baseaddr_set && iph->p_vaddr) {
401#else
402            if (!baseaddr_set) {
403#endif
404               interp_addr  = iph->p_vaddr;
405               interp_align = iph->p_align;
406               baseaddr_set = 1;
407            }
408
409            /* assumes that all segments in the interp are close */
410            end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
411
412            if (end > interp_size)
413               interp_size = end;
414         }
415         break;
416
417      default:
418         // do nothing
419         break;
420      }
421      }
422   }
423
424   if (info->phdr == 0)
425      info->phdr = minaddr + ebase + e->e.e_phoff;
426
427   if (info->exe_base != info->exe_end) {
428      if (minaddr >= maxaddr ||
429          (minaddr + ebase < info->exe_base ||
430           maxaddr + ebase > info->exe_end)) {
431         VG_(printf)("Executable range %p-%p is outside the\n"
432                     "acceptable range %p-%p\n",
433                     (char *)minaddr + ebase, (char *)maxaddr + ebase,
434                     (char *)info->exe_base,  (char *)info->exe_end);
435         return VKI_ENOMEM;
436      }
437   }
438
439   info->brkbase = mapelf(e, ebase);    /* map the executable */
440
441   if (info->brkbase == 0)
442      return VKI_ENOMEM;
443
444   if (interp != NULL) {
445      /* reserve a chunk of address space for interpreter */
446      MapRequest mreq;
447      Addr       advised;
448      Bool       ok;
449
450      /* Don't actually reserve the space.  Just get an advisory
451         indicating where it would be allocated, and pass that to
452         mapelf(), which in turn asks aspacem to do some fixed maps at
453         the specified address.  This is a bit of hack, but it should
454         work because there should be no intervening transactions with
455         aspacem which could cause those fixed maps to fail.
456
457         Placement policy is:
458
459         if the interpreter asks to be loaded at zero
460            ignore that and put it wherever we like (mappings at zero
461            are bad news)
462         else
463            try and put it where it asks for, but if that doesn't work,
464            just put it anywhere.
465      */
466      if (interp_addr == 0) {
467         mreq.rkind = MAny;
468         mreq.start = 0;
469         mreq.len   = interp_size;
470      } else {
471         mreq.rkind = MHint;
472         mreq.start = interp_addr;
473         mreq.len   = interp_size;
474      }
475
476      advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
477
478      if (!ok) {
479         /* bomb out */
480         SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
481         if (0) VG_(printf)("reserve for interp: failed\n");
482         check_mmap(res, (Addr)interp_addr, interp_size);
483         /*NOTREACHED*/
484      }
485
486      (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
487
488      VG_(close)(interp->fd);
489
490      entry = (void *)(advised - interp_addr + interp->e.e_entry);
491      info->interp_base = (ESZ(Addr))advised;
492      interp_offset = advised - interp_addr;
493
494      VG_(free)(interp->p);
495      VG_(free)(interp);
496   } else {
497      Char *exit_if_static = VG_(getenv)("VALGRIND_EXIT_IF_STATIC");
498      if (exit_if_static && VG_(strcmp)(exit_if_static, "0") != 0) {
499        VG_(printf)("******* You are running Valgrind on a static binary: %s\n",
500                    name);
501        VG_(printf)("******* This is not supported, exiting\n");
502        VG_(exit)(1);
503      }
504      entry = (void *)(ebase + e->e.e_entry);
505   }
506
507   info->exe_base = minaddr + ebase;
508   info->exe_end  = maxaddr + ebase;
509
510#if defined(VGP_ppc64_linux)
511   /* On PPC64, a func ptr is represented by a TOC entry ptr.  This
512      TOC entry contains three words; the first word is the function
513      address, the second word is the TOC ptr (r2), and the third word
514      is the static chain value. */
515   info->init_ip  = ((ULong*)entry)[0];
516   info->init_toc = ((ULong*)entry)[1];
517   info->init_ip  += interp_offset;
518   info->init_toc += interp_offset;
519#else
520   info->init_ip  = (Addr)entry;
521   info->init_toc = 0; /* meaningless on this platform */
522#endif
523   VG_(free)(e->p);
524   VG_(free)(e);
525
526   return 0;
527}
528
529#endif // defined(VGO_linux)
530
531/*--------------------------------------------------------------------*/
532/*--- end                                                          ---*/
533/*--------------------------------------------------------------------*/
534