1
2/*--------------------------------------------------------------------*/
3/*--- User-mode execve() for ELF executables           m_ume_elf.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2011 Julian Seward
11      jseward@acm.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#if defined(VGO_linux)
32
33#include "pub_core_basics.h"
34#include "pub_core_vki.h"
35
36#include "pub_core_aspacemgr.h"     // various mapping fns
37#include "pub_core_debuglog.h"
38#include "pub_core_libcassert.h"    // VG_(exit), vg_assert
39#include "pub_core_libcbase.h"      // VG_(memcmp), etc
40#include "pub_core_libcprint.h"
41#include "pub_core_libcfile.h"      // VG_(open) et al
42#include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
43#include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
44#include "pub_core_syscall.h"       // VG_(strerror)
45#include "pub_core_ume.h"           // self
46#include "pub_tool_libcproc.h"      // VG_(getenv)
47
48#include "priv_ume.h"
49
50/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
51#define _GNU_SOURCE
52#define _FILE_OFFSET_BITS 64
53/* This is for ELF types etc, and also the AT_ constants. */
54#include <elf.h>
55/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
56
57
58#if     VG_WORDSIZE == 8
59#define ESZ(x)  Elf64_##x
60#elif   VG_WORDSIZE == 4
61#define ESZ(x)  Elf32_##x
62#else
63#error VG_WORDSIZE needs to ==4 or ==8
64#endif
65
66struct elfinfo
67{
68   ESZ(Ehdr)    e;
69   ESZ(Phdr)    *p;
70   Int          fd;
71};
72
73static void check_mmap(SysRes res, Addr base, SizeT len)
74{
75   if (sr_isError(res)) {
76      VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
77                  "with error %lu (%s).\n",
78                  (ULong)base, (Long)len,
79                  sr_Err(res), VG_(strerror)(sr_Err(res)) );
80      if (sr_Err(res) == VKI_EINVAL) {
81         VG_(printf)("valgrind: this can be caused by executables with "
82                     "very large text, data or bss segments.\n");
83      }
84      VG_(exit)(1);
85   }
86}
87
88/*------------------------------------------------------------*/
89/*--- Loading ELF files                                    ---*/
90/*------------------------------------------------------------*/
91
92static
93struct elfinfo *readelf(Int fd, const char *filename)
94{
95   SysRes sres;
96   struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
97   Int phsz;
98
99   vg_assert(e);
100   e->fd = fd;
101
102   sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
103   if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
104      VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
105                  filename, VG_(strerror)(sr_Err(sres)));
106      goto bad;
107   }
108
109   if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
110      VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
111      goto bad;
112   }
113   if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
114      VG_(printf)("valgrind: wrong ELF executable class "
115                  "(eg. 32-bit instead of 64-bit)\n");
116      goto bad;
117   }
118   if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
119      VG_(printf)("valgrind: executable has wrong endian-ness\n");
120      goto bad;
121   }
122   if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
123      VG_(printf)("valgrind: this is not an executable\n");
124      goto bad;
125   }
126
127   if (e->e.e_machine != VG_ELF_MACHINE) {
128      VG_(printf)("valgrind: executable is not for "
129                  "this architecture\n");
130      goto bad;
131   }
132
133   if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
134      VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
135      goto bad;
136   }
137
138   phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
139   e->p = VG_(malloc)("ume.re.2", phsz);
140   vg_assert(e->p);
141
142   sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
143   if (sr_isError(sres) || sr_Res(sres) != phsz) {
144      VG_(printf)("valgrind: can't read phdr: %s\n",
145                  VG_(strerror)(sr_Err(sres)));
146      VG_(free)(e->p);
147      goto bad;
148   }
149
150   return e;
151
152  bad:
153   VG_(free)(e);
154   return NULL;
155}
156
157/* Map an ELF file.  Returns the brk address. */
158static
159ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
160{
161   Int    i;
162   SysRes res;
163   ESZ(Addr) elfbrk = 0;
164
165   for (i = 0; i < e->e.e_phnum; i++) {
166      ESZ(Phdr) *ph = &e->p[i];
167      ESZ(Addr) addr, brkaddr;
168      ESZ(Word) memsz;
169
170      if (ph->p_type != PT_LOAD)
171         continue;
172
173      addr    = ph->p_vaddr+base;
174      memsz   = ph->p_memsz;
175      brkaddr = addr+memsz;
176
177      if (brkaddr > elfbrk)
178         elfbrk = brkaddr;
179   }
180
181   for (i = 0; i < e->e.e_phnum; i++) {
182      ESZ(Phdr) *ph = &e->p[i];
183      ESZ(Addr) addr, bss, brkaddr;
184      ESZ(Off) off;
185      ESZ(Word) filesz;
186      ESZ(Word) memsz;
187      unsigned prot = 0;
188
189      if (ph->p_type != PT_LOAD)
190         continue;
191
192      if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
193      if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
194      if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
195
196      addr    = ph->p_vaddr+base;
197      off     = ph->p_offset;
198      filesz  = ph->p_filesz;
199      bss     = addr+filesz;
200      memsz   = ph->p_memsz;
201      brkaddr = addr+memsz;
202
203      // Tom says: In the following, do what the Linux kernel does and only
204      // map the pages that are required instead of rounding everything to
205      // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
206      // use ph->p_align -- part of stage2's memory gets trashed somehow.)
207      //
208      // The condition handles the case of a zero-length segment.
209      if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
210         if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
211         res = VG_(am_mmap_file_fixed_client)(
212                  VG_PGROUNDDN(addr),
213                  VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
214                  prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
215                  e->fd, VG_PGROUNDDN(off)
216               );
217         if (0) VG_(am_show_nsegments)(0,"after #1");
218         check_mmap(res, VG_PGROUNDDN(addr),
219                         VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
220      }
221
222      // if memsz > filesz, fill the remainder with zeroed pages
223      if (memsz > filesz) {
224         UInt bytes;
225
226         bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
227         if (bytes > 0) {
228            if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
229            res = VG_(am_mmap_anon_fixed_client)(
230                     VG_PGROUNDUP(bss), bytes,
231                     prot
232                  );
233            if (0) VG_(am_show_nsegments)(0,"after #2");
234            check_mmap(res, VG_PGROUNDUP(bss), bytes);
235         }
236
237         bytes = bss & (VKI_PAGE_SIZE - 1);
238
239         // The 'prot' condition allows for a read-only bss
240         if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
241            bytes = VKI_PAGE_SIZE - bytes;
242            VG_(memset)((char *)bss, 0, bytes);
243         }
244      }
245   }
246
247   return elfbrk;
248}
249
250Bool VG_(match_ELF)(Char *hdr, Int len)
251{
252   ESZ(Ehdr) *e = (ESZ(Ehdr) *)hdr;
253   return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
254}
255
256
257/* load_ELF pulls an ELF executable into the address space, prepares
258   it for execution, and writes info about it into INFO.  In
259   particular it fills in .init_eip, which is the starting point.
260
261   Returns zero on success, non-zero (a VKI_E.. value) on failure.
262
263   The sequence of activities is roughly as follows:
264
265   - use readelf() to extract program header info from the exe file.
266
267   - scan the program header, collecting info (not sure what all those
268     info-> fields are, or whether they are used, but still) and in
269     particular looking out fo the PT_INTERP header, which describes
270     the interpreter.  If such a field is found, the space needed to
271     hold the interpreter is computed into interp_size.
272
273   - map the executable in, by calling mapelf().  This maps in all
274     loadable sections, and I _think_ also creates any .bss areas
275     required.  mapelf() returns the address just beyond the end of
276     the furthest-along mapping it creates.  The executable is mapped
277     starting at EBASE, which is usually read from it (eg, 0x8048000
278     etc) except if it's a PIE, in which case I'm not sure what
279     happens.
280
281     The returned address is recorded in info->brkbase as the start
282     point of the brk (data) segment, as it is traditional to place
283     the data segment just after the executable.  Neither load_ELF nor
284     mapelf creates the brk segment, though: that is for the caller of
285     load_ELF to attend to.
286
287   - If the initial phdr scan didn't find any mention of an
288     interpreter (interp == NULL), this must be a statically linked
289     executable, and we're pretty much done.
290
291   - Otherwise, we need to use mapelf() a second time to load the
292     interpreter.  The interpreter can go anywhere, but mapelf() wants
293     to be told a specific address to put it at.  So an advisory query
294     is passed to aspacem, asking where it would put an anonymous
295     client mapping of size INTERP_SIZE.  That address is then used
296     as the mapping address for the interpreter.
297
298   - The entry point in INFO is set to the interpreter's entry point,
299     and we're done.  */
300Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
301{
302   SysRes sres;
303   struct elfinfo *e;
304   struct elfinfo *interp = NULL;
305   ESZ(Addr) minaddr = ~0;      /* lowest mapped address */
306   ESZ(Addr) maxaddr = 0;       /* highest mapped address */
307   ESZ(Addr) interp_addr = 0;   /* interpreter (ld.so) address */
308   ESZ(Word) interp_size = 0;   /* interpreter size */
309   /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
310   Int i;
311   void *entry;
312   ESZ(Addr) ebase = 0;
313
314   /* The difference between where the interpreter got mapped and
315      where it asked to be mapped.  Needed for computing the ppc64 ELF
316      entry point and initial tocptr (R2) value. */
317   ESZ(Word) interp_offset = 0;
318
319#ifdef HAVE_PIE
320   ebase = info->exe_base;
321#endif
322
323   e = readelf(fd, name);
324
325   if (e == NULL)
326      return VKI_ENOEXEC;
327
328   /* The kernel maps position-independent executables at TASK_SIZE*2/3;
329      duplicate this behavior as close as we can. */
330   if (e->e.e_type == ET_DYN && ebase == 0) {
331      ebase = VG_PGROUNDDN(info->exe_base
332                           + (info->exe_end - info->exe_base) * 2 / 3);
333      /* We really don't want to load PIEs at zero or too close.  It
334         works, but it's unrobust (NULL pointer reads and writes
335         become legit, which is really bad) and causes problems for
336         exp-ptrcheck, which assumes all numbers below 1MB are
337         nonpointers.  So, hackily, move it above 1MB. */
338      /* Later .. is appears ppc32-linux tries to put [vdso] at 1MB,
339         which totally screws things up, because nothing else can go
340         there.  So bump the hacky load addess along by 0x8000, to
341         0x108000. */
342      if (ebase < 0x108000)
343         ebase = 0x108000;
344   }
345
346   info->phnum = e->e.e_phnum;
347   info->entry = e->e.e_entry + ebase;
348   info->phdr = 0;
349
350   for (i = 0; i < e->e.e_phnum; i++) {
351      ESZ(Phdr) *ph = &e->p[i];
352
353      switch(ph->p_type) {
354      case PT_PHDR:
355         info->phdr = ph->p_vaddr + ebase;
356         break;
357
358      case PT_LOAD:
359         if (ph->p_vaddr < minaddr)
360            minaddr = ph->p_vaddr;
361         if (ph->p_vaddr+ph->p_memsz > maxaddr)
362            maxaddr = ph->p_vaddr+ph->p_memsz;
363         break;
364
365      case PT_INTERP: {
366         HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
367         Int j;
368         Int intfd;
369         Int baseaddr_set;
370
371         vg_assert(buf);
372         VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
373         buf[ph->p_filesz] = '\0';
374
375         sres = VG_(open)(buf, VKI_O_RDONLY, 0);
376         if (sr_isError(sres)) {
377            VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
378            VG_(exit)(1);
379         }
380         intfd = sr_Res(sres);
381
382         interp = readelf(intfd, buf);
383         if (interp == NULL) {
384            VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
385            return 1;
386         }
387         VG_(free)(buf);
388
389         baseaddr_set = 0;
390         for (j = 0; j < interp->e.e_phnum; j++) {
391            ESZ(Phdr) *iph = &interp->p[j];
392            ESZ(Addr) end;
393
394            if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
395               continue;
396
397#ifdef ANDROID
398            // On older versions of Android, the first LOAD segment of
399            // /system/bin/linker has vaddr=0, memsz=0, but subsequent
400            // segments start at 0xb0001000.
401            //
402            // On newer versions of Android, the linker is ET_DYN and
403            // we don't have to worry about iph->p_vaddr
404            if (!baseaddr_set
405                && (iph->p_vaddr || (interp->e.e_type == ET_DYN))) {
406#else
407            if (!baseaddr_set) {
408#endif
409               interp_addr  = iph->p_vaddr;
410               /* interp_align = iph->p_align; */ /* UNUSED */
411               baseaddr_set = 1;
412            }
413
414            /* assumes that all segments in the interp are close */
415            end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
416
417            if (end > interp_size)
418               interp_size = end;
419         }
420         break;
421
422      default:
423         // do nothing
424         break;
425      }
426      }
427   }
428
429   if (info->phdr == 0)
430      info->phdr = minaddr + ebase + e->e.e_phoff;
431
432   if (info->exe_base != info->exe_end) {
433      if (minaddr >= maxaddr ||
434          (minaddr + ebase < info->exe_base ||
435           maxaddr + ebase > info->exe_end)) {
436         VG_(printf)("Executable range %p-%p is outside the\n"
437                     "acceptable range %p-%p\n",
438                     (char *)minaddr + ebase, (char *)maxaddr + ebase,
439                     (char *)info->exe_base,  (char *)info->exe_end);
440         return VKI_ENOMEM;
441      }
442   }
443
444   info->brkbase = mapelf(e, ebase);    /* map the executable */
445
446   if (info->brkbase == 0)
447      return VKI_ENOMEM;
448
449   if (interp != NULL) {
450      /* reserve a chunk of address space for interpreter */
451      MapRequest mreq;
452      Addr       advised;
453      Bool       ok;
454
455      /* Don't actually reserve the space.  Just get an advisory
456         indicating where it would be allocated, and pass that to
457         mapelf(), which in turn asks aspacem to do some fixed maps at
458         the specified address.  This is a bit of hack, but it should
459         work because there should be no intervening transactions with
460         aspacem which could cause those fixed maps to fail.
461
462         Placement policy is:
463
464         if the interpreter asks to be loaded at zero
465            ignore that and put it wherever we like (mappings at zero
466            are bad news)
467         else
468            try and put it where it asks for, but if that doesn't work,
469            just put it anywhere.
470      */
471      if (interp_addr == 0) {
472         mreq.rkind = MAny;
473         mreq.start = 0;
474         mreq.len   = interp_size;
475      } else {
476         mreq.rkind = MHint;
477         mreq.start = interp_addr;
478         mreq.len   = interp_size;
479      }
480
481      advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
482
483      if (!ok) {
484         /* bomb out */
485         SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
486         if (0) VG_(printf)("reserve for interp: failed\n");
487         check_mmap(res, (Addr)interp_addr, interp_size);
488         /*NOTREACHED*/
489      }
490
491      (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
492
493      VG_(close)(interp->fd);
494
495      entry = (void *)(advised - interp_addr + interp->e.e_entry);
496      info->interp_base = (ESZ(Addr))advised;
497      interp_offset = advised - interp_addr;
498
499      VG_(free)(interp->p);
500      VG_(free)(interp);
501   } else {
502      Char *exit_if_static = VG_(getenv)("VALGRIND_EXIT_IF_STATIC");
503      if (exit_if_static && VG_(strcmp)(exit_if_static, "0") != 0) {
504        VG_(printf)("******* You are running Valgrind on a static binary: %s\n",
505                    name);
506        VG_(printf)("******* This is not supported, exiting\n");
507        VG_(exit)(1);
508      }
509      entry = (void *)(ebase + e->e.e_entry);
510   }
511
512   info->exe_base = minaddr + ebase;
513   info->exe_end  = maxaddr + ebase;
514
515#if defined(VGP_ppc64_linux)
516   /* On PPC64, a func ptr is represented by a TOC entry ptr.  This
517      TOC entry contains three words; the first word is the function
518      address, the second word is the TOC ptr (r2), and the third word
519      is the static chain value. */
520   info->init_ip  = ((ULong*)entry)[0];
521   info->init_toc = ((ULong*)entry)[1];
522   info->init_ip  += interp_offset;
523   info->init_toc += interp_offset;
524#else
525   info->init_ip  = (Addr)entry;
526   info->init_toc = 0; /* meaningless on this platform */
527   (void) interp_offset; /* stop gcc complaining it is unused */
528#endif
529   VG_(free)(e->p);
530   VG_(free)(e);
531
532   return 0;
533}
534
535#endif // defined(VGO_linux)
536
537/*--------------------------------------------------------------------*/
538/*--- end                                                          ---*/
539/*--------------------------------------------------------------------*/
540