1
2/*--------------------------------------------------------------------*/
3/*--- User-mode execve() for ELF executables           m_ume_elf.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2013 Julian Seward
11      jseward@acm.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#if defined(VGO_linux)
32
33#include "pub_core_basics.h"
34#include "pub_core_vki.h"
35
36#include "pub_core_aspacemgr.h"     // various mapping fns
37#include "pub_core_debuglog.h"
38#include "pub_core_libcassert.h"    // VG_(exit), vg_assert
39#include "pub_core_libcbase.h"      // VG_(memcmp), etc
40#include "pub_core_libcprint.h"
41#include "pub_core_libcfile.h"      // VG_(open) et al
42#include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
43#include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
44#include "pub_core_syscall.h"       // VG_(strerror)
45#include "pub_core_ume.h"           // self
46
47#include "priv_ume.h"
48
49/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
50#define _GNU_SOURCE
51#define _FILE_OFFSET_BITS 64
52/* This is for ELF types etc, and also the AT_ constants. */
53#include <elf.h>
54/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
55
56
57#if     VG_WORDSIZE == 8
58#define ESZ(x)  Elf64_##x
59#elif   VG_WORDSIZE == 4
60#define ESZ(x)  Elf32_##x
61#else
62#error VG_WORDSIZE needs to ==4 or ==8
63#endif
64
65struct elfinfo
66{
67   ESZ(Ehdr)    e;
68   ESZ(Phdr)    *p;
69   Int          fd;
70};
71
72static void check_mmap(SysRes res, Addr base, SizeT len)
73{
74   if (sr_isError(res)) {
75      VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
76                  "with error %lu (%s).\n",
77                  (ULong)base, (Long)len,
78                  sr_Err(res), VG_(strerror)(sr_Err(res)) );
79      if (sr_Err(res) == VKI_EINVAL) {
80         VG_(printf)("valgrind: this can be caused by executables with "
81                     "very large text, data or bss segments.\n");
82      }
83      VG_(exit)(1);
84   }
85}
86
87/*------------------------------------------------------------*/
88/*--- Loading ELF files                                    ---*/
89/*------------------------------------------------------------*/
90
91static
92struct elfinfo *readelf(Int fd, const HChar *filename)
93{
94   SysRes sres;
95   struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
96   Int phsz;
97
98   vg_assert(e);
99   e->fd = fd;
100
101   sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
102   if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
103      VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
104                  filename, VG_(strerror)(sr_Err(sres)));
105      goto bad;
106   }
107
108   if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
109      VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
110      goto bad;
111   }
112   if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
113      VG_(printf)("valgrind: wrong ELF executable class "
114                  "(eg. 32-bit instead of 64-bit)\n");
115      goto bad;
116   }
117   if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
118      VG_(printf)("valgrind: executable has wrong endian-ness\n");
119      goto bad;
120   }
121   if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
122      VG_(printf)("valgrind: this is not an executable\n");
123      goto bad;
124   }
125
126   if (e->e.e_machine != VG_ELF_MACHINE) {
127      VG_(printf)("valgrind: executable is not for "
128                  "this architecture\n");
129      goto bad;
130   }
131
132   if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
133      VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
134      goto bad;
135   }
136
137   phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
138   e->p = VG_(malloc)("ume.re.2", phsz);
139   vg_assert(e->p);
140
141   sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
142   if (sr_isError(sres) || sr_Res(sres) != phsz) {
143      VG_(printf)("valgrind: can't read phdr: %s\n",
144                  VG_(strerror)(sr_Err(sres)));
145      VG_(free)(e->p);
146      goto bad;
147   }
148
149   return e;
150
151  bad:
152   VG_(free)(e);
153   return NULL;
154}
155
156/* Map an ELF file.  Returns the brk address. */
157static
158ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
159{
160   Int    i;
161   SysRes res;
162   ESZ(Addr) elfbrk = 0;
163
164   for (i = 0; i < e->e.e_phnum; i++) {
165      ESZ(Phdr) *ph = &e->p[i];
166      ESZ(Addr) addr, brkaddr;
167      ESZ(Word) memsz;
168
169      if (ph->p_type != PT_LOAD)
170         continue;
171
172      addr    = ph->p_vaddr+base;
173      memsz   = ph->p_memsz;
174      brkaddr = addr+memsz;
175
176      if (brkaddr > elfbrk)
177         elfbrk = brkaddr;
178   }
179
180   for (i = 0; i < e->e.e_phnum; i++) {
181      ESZ(Phdr) *ph = &e->p[i];
182      ESZ(Addr) addr, bss, brkaddr;
183      ESZ(Off) off;
184      ESZ(Word) filesz;
185      ESZ(Word) memsz;
186      unsigned prot = 0;
187
188      if (ph->p_type != PT_LOAD)
189         continue;
190
191      if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
192      if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
193      if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
194
195      addr    = ph->p_vaddr+base;
196      off     = ph->p_offset;
197      filesz  = ph->p_filesz;
198      bss     = addr+filesz;
199      memsz   = ph->p_memsz;
200      brkaddr = addr+memsz;
201
202      // Tom says: In the following, do what the Linux kernel does and only
203      // map the pages that are required instead of rounding everything to
204      // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
205      // use ph->p_align -- part of stage2's memory gets trashed somehow.)
206      //
207      // The condition handles the case of a zero-length segment.
208      if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
209         if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
210         res = VG_(am_mmap_file_fixed_client)(
211                  VG_PGROUNDDN(addr),
212                  VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
213                  prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
214                  e->fd, VG_PGROUNDDN(off)
215               );
216         if (0) VG_(am_show_nsegments)(0,"after #1");
217         check_mmap(res, VG_PGROUNDDN(addr),
218                         VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
219      }
220
221      // if memsz > filesz, fill the remainder with zeroed pages
222      if (memsz > filesz) {
223         UInt bytes;
224
225         bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
226         if (bytes > 0) {
227            if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
228            res = VG_(am_mmap_anon_fixed_client)(
229                     VG_PGROUNDUP(bss), bytes,
230                     prot
231                  );
232            if (0) VG_(am_show_nsegments)(0,"after #2");
233            check_mmap(res, VG_PGROUNDUP(bss), bytes);
234         }
235
236         bytes = bss & (VKI_PAGE_SIZE - 1);
237
238         // The 'prot' condition allows for a read-only bss
239         if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
240            bytes = VKI_PAGE_SIZE - bytes;
241            VG_(memset)((void *)bss, 0, bytes);
242         }
243      }
244   }
245
246   return elfbrk;
247}
248
249Bool VG_(match_ELF)(const void *hdr, Int len)
250{
251   const ESZ(Ehdr) *e = hdr;
252   return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
253}
254
255
256/* load_ELF pulls an ELF executable into the address space, prepares
257   it for execution, and writes info about it into INFO.  In
258   particular it fills in .init_eip, which is the starting point.
259
260   Returns zero on success, non-zero (a VKI_E.. value) on failure.
261
262   The sequence of activities is roughly as follows:
263
264   - use readelf() to extract program header info from the exe file.
265
266   - scan the program header, collecting info (not sure what all those
267     info-> fields are, or whether they are used, but still) and in
268     particular looking out fo the PT_INTERP header, which describes
269     the interpreter.  If such a field is found, the space needed to
270     hold the interpreter is computed into interp_size.
271
272   - map the executable in, by calling mapelf().  This maps in all
273     loadable sections, and I _think_ also creates any .bss areas
274     required.  mapelf() returns the address just beyond the end of
275     the furthest-along mapping it creates.  The executable is mapped
276     starting at EBASE, which is usually read from it (eg, 0x8048000
277     etc) except if it's a PIE, in which case I'm not sure what
278     happens.
279
280     The returned address is recorded in info->brkbase as the start
281     point of the brk (data) segment, as it is traditional to place
282     the data segment just after the executable.  Neither load_ELF nor
283     mapelf creates the brk segment, though: that is for the caller of
284     load_ELF to attend to.
285
286   - If the initial phdr scan didn't find any mention of an
287     interpreter (interp == NULL), this must be a statically linked
288     executable, and we're pretty much done.
289
290   - Otherwise, we need to use mapelf() a second time to load the
291     interpreter.  The interpreter can go anywhere, but mapelf() wants
292     to be told a specific address to put it at.  So an advisory query
293     is passed to aspacem, asking where it would put an anonymous
294     client mapping of size INTERP_SIZE.  That address is then used
295     as the mapping address for the interpreter.
296
297   - The entry point in INFO is set to the interpreter's entry point,
298     and we're done.  */
299Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
300{
301   SysRes sres;
302   struct elfinfo *e;
303   struct elfinfo *interp = NULL;
304   ESZ(Addr) minaddr = ~0;      /* lowest mapped address */
305   ESZ(Addr) maxaddr = 0;       /* highest mapped address */
306   ESZ(Addr) interp_addr = 0;   /* interpreter (ld.so) address */
307   ESZ(Word) interp_size = 0;   /* interpreter size */
308   /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
309   Int i;
310   void *entry;
311   ESZ(Addr) ebase = 0;
312
313#  if defined(HAVE_PIE)
314   ebase = info->exe_base;
315#  endif
316
317   e = readelf(fd, name);
318
319   if (e == NULL)
320      return VKI_ENOEXEC;
321
322   /* The kernel maps position-independent executables at TASK_SIZE*2/3;
323      duplicate this behavior as close as we can. */
324   if (e->e.e_type == ET_DYN && ebase == 0) {
325      ebase = VG_PGROUNDDN(info->exe_base
326                           + (info->exe_end - info->exe_base) * 2 / 3);
327      /* We really don't want to load PIEs at zero or too close.  It
328         works, but it's unrobust (NULL pointer reads and writes
329         become legit, which is really bad) and causes problems for
330         exp-ptrcheck, which assumes all numbers below 1MB are
331         nonpointers.  So, hackily, move it above 1MB. */
332      /* Later .. it appears ppc32-linux tries to put [vdso] at 1MB,
333         which totally screws things up, because nothing else can go
334         there.  The size of [vdso] is around 2 or 3 pages, so bump
335         the hacky load addess along by 8 * VKI_PAGE_SIZE to be safe. */
336      /* Later .. on mips64 we can't use 0x108000, because mapelf will
337         fail. */
338#     if defined(VGP_mips64_linux)
339      if (ebase < 0x100000)
340         ebase = 0x100000;
341#     else
342      vg_assert(VKI_PAGE_SIZE >= 4096); /* stay sane */
343      ESZ(Addr) hacky_load_address = 0x100000 + 8 * VKI_PAGE_SIZE;
344      if (ebase < hacky_load_address)
345         ebase = hacky_load_address;
346#     endif
347   }
348
349   info->phnum = e->e.e_phnum;
350   info->entry = e->e.e_entry + ebase;
351   info->phdr = 0;
352   info->stack_prot = VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC;
353
354   for (i = 0; i < e->e.e_phnum; i++) {
355      ESZ(Phdr) *ph = &e->p[i];
356
357      switch(ph->p_type) {
358      case PT_PHDR:
359         info->phdr = ph->p_vaddr + ebase;
360         break;
361
362      case PT_LOAD:
363         if (ph->p_vaddr < minaddr)
364            minaddr = ph->p_vaddr;
365         if (ph->p_vaddr+ph->p_memsz > maxaddr)
366            maxaddr = ph->p_vaddr+ph->p_memsz;
367         break;
368
369      case PT_INTERP: {
370         HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
371         Int j;
372         Int intfd;
373         Int baseaddr_set;
374
375         vg_assert(buf);
376         VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
377         buf[ph->p_filesz] = '\0';
378
379         sres = VG_(open)(buf, VKI_O_RDONLY, 0);
380         if (sr_isError(sres)) {
381            VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
382            VG_(exit)(1);
383         }
384         intfd = sr_Res(sres);
385
386         interp = readelf(intfd, buf);
387         if (interp == NULL) {
388            VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
389            return 1;
390         }
391         VG_(free)(buf);
392
393         baseaddr_set = 0;
394         for (j = 0; j < interp->e.e_phnum; j++) {
395            ESZ(Phdr) *iph = &interp->p[j];
396            ESZ(Addr) end;
397
398            if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
399               continue;
400
401            if (!baseaddr_set) {
402               interp_addr  = iph->p_vaddr;
403               /* interp_align = iph->p_align; */ /* UNUSED */
404               baseaddr_set = 1;
405            }
406
407            /* assumes that all segments in the interp are close */
408            end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
409
410            if (end > interp_size)
411               interp_size = end;
412         }
413         break;
414
415#     if defined(PT_GNU_STACK)
416      /* Android's elf.h doesn't appear to have PT_GNU_STACK. */
417      case PT_GNU_STACK:
418         if ((ph->p_flags & PF_X) == 0) info->stack_prot &= ~VKI_PROT_EXEC;
419         if ((ph->p_flags & PF_W) == 0) info->stack_prot &= ~VKI_PROT_WRITE;
420         if ((ph->p_flags & PF_R) == 0) info->stack_prot &= ~VKI_PROT_READ;
421         break;
422#     endif
423
424      default:
425         // do nothing
426         break;
427      }
428      }
429   }
430
431   if (info->phdr == 0)
432      info->phdr = minaddr + ebase + e->e.e_phoff;
433
434   if (info->exe_base != info->exe_end) {
435      if (minaddr >= maxaddr ||
436          (minaddr + ebase < info->exe_base ||
437           maxaddr + ebase > info->exe_end)) {
438         VG_(printf)("Executable range %p-%p is outside the\n"
439                     "acceptable range %p-%p\n",
440                     (char *)minaddr + ebase, (char *)maxaddr + ebase,
441                     (char *)info->exe_base,  (char *)info->exe_end);
442         return VKI_ENOMEM;
443      }
444   }
445
446   info->brkbase = mapelf(e, ebase);    /* map the executable */
447
448   if (info->brkbase == 0)
449      return VKI_ENOMEM;
450
451   if (interp != NULL) {
452      /* reserve a chunk of address space for interpreter */
453      MapRequest mreq;
454      Addr       advised;
455      Bool       ok;
456
457      /* Don't actually reserve the space.  Just get an advisory
458         indicating where it would be allocated, and pass that to
459         mapelf(), which in turn asks aspacem to do some fixed maps at
460         the specified address.  This is a bit of hack, but it should
461         work because there should be no intervening transactions with
462         aspacem which could cause those fixed maps to fail.
463
464         Placement policy is:
465
466         if the interpreter asks to be loaded at zero
467            ignore that and put it wherever we like (mappings at zero
468            are bad news)
469         else
470            try and put it where it asks for, but if that doesn't work,
471            just put it anywhere.
472      */
473      if (interp_addr == 0) {
474         mreq.rkind = MAny;
475         mreq.start = 0;
476         mreq.len   = interp_size;
477      } else {
478         mreq.rkind = MHint;
479         mreq.start = interp_addr;
480         mreq.len   = interp_size;
481      }
482
483      advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
484
485      if (!ok) {
486         /* bomb out */
487         SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
488         if (0) VG_(printf)("reserve for interp: failed\n");
489         check_mmap(res, (Addr)interp_addr, interp_size);
490         /*NOTREACHED*/
491      }
492
493      (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
494
495      VG_(close)(interp->fd);
496
497      entry = (void *)(advised - interp_addr + interp->e.e_entry);
498      info->interp_offset = advised - interp_addr;
499
500      VG_(free)(interp->p);
501      VG_(free)(interp);
502   } else
503      entry = (void *)(ebase + e->e.e_entry);
504
505   info->exe_base = minaddr + ebase;
506   info->exe_end  = maxaddr + ebase;
507
508#if defined(VGP_ppc64_linux)
509   /* On PPC64, a func ptr is represented by a TOC entry ptr.  This
510      TOC entry contains three words; the first word is the function
511      address, the second word is the TOC ptr (r2), and the third word
512      is the static chain value. */
513   info->init_ip  = ((ULong*)entry)[0];
514   info->init_toc = ((ULong*)entry)[1];
515   info->init_ip  += info->interp_offset;
516   info->init_toc += info->interp_offset;
517#else
518   info->init_ip  = (Addr)entry;
519   info->init_toc = 0; /* meaningless on this platform */
520#endif
521   VG_(free)(e->p);
522   VG_(free)(e);
523
524   return 0;
525}
526
527#endif // defined(VGO_linux)
528
529/*--------------------------------------------------------------------*/
530/*--- end                                                          ---*/
531/*--------------------------------------------------------------------*/
532