1// Copyright (c) 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Paul Pluzhnikov
32//
33// Allow dynamic symbol lookup in an in-memory Elf image.
34//
35
36#include "base/elf_mem_image.h"
37
38#ifdef HAVE_ELF_MEM_IMAGE  // defined in elf_mem_image.h
39
40#include <stddef.h>   // for size_t, ptrdiff_t
41#include "base/logging.h"
42
43// From binutils/include/elf/common.h (this doesn't appear to be documented
44// anywhere else).
45//
46//   /* This flag appears in a Versym structure.  It means that the symbol
47//      is hidden, and is only visible with an explicit version number.
48//      This is a GNU extension.  */
49//   #define VERSYM_HIDDEN           0x8000
50//
51//   /* This is the mask for the rest of the Versym information.  */
52//   #define VERSYM_VERSION          0x7fff
53
54#define VERSYM_VERSION 0x7fff
55
56namespace base {
57
58namespace {
59template <int N> class ElfClass {
60 public:
61  static const int kElfClass = -1;
62  static int ElfBind(const ElfW(Sym) *) {
63    CHECK(false); // << "Unexpected word size";
64    return 0;
65  }
66  static int ElfType(const ElfW(Sym) *) {
67    CHECK(false); // << "Unexpected word size";
68    return 0;
69  }
70};
71
72template <> class ElfClass<32> {
73 public:
74  static const int kElfClass = ELFCLASS32;
75  static int ElfBind(const ElfW(Sym) *symbol) {
76    return ELF32_ST_BIND(symbol->st_info);
77  }
78  static int ElfType(const ElfW(Sym) *symbol) {
79    return ELF32_ST_TYPE(symbol->st_info);
80  }
81};
82
83template <> class ElfClass<64> {
84 public:
85  static const int kElfClass = ELFCLASS64;
86  static int ElfBind(const ElfW(Sym) *symbol) {
87    return ELF64_ST_BIND(symbol->st_info);
88  }
89  static int ElfType(const ElfW(Sym) *symbol) {
90    return ELF64_ST_TYPE(symbol->st_info);
91  }
92};
93
94typedef ElfClass<__WORDSIZE> CurrentElfClass;
95
96// Extract an element from one of the ELF tables, cast it to desired type.
97// This is just a simple arithmetic and a glorified cast.
98// Callers are responsible for bounds checking.
99template <class T>
100const T* GetTableElement(const ElfW(Ehdr) *ehdr,
101                         ElfW(Off) table_offset,
102                         ElfW(Word) element_size,
103                         size_t index) {
104  return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
105                                    + table_offset
106                                    + index * element_size);
107}
108}  // namespace
109
110const void *const ElfMemImage::kInvalidBase =
111    reinterpret_cast<const void *>(~0L);
112
113ElfMemImage::ElfMemImage(const void *base) {
114  CHECK(base != kInvalidBase);
115  Init(base);
116}
117
118int ElfMemImage::GetNumSymbols() const {
119  if (!hash_) {
120    return 0;
121  }
122  // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash
123  return hash_[1];
124}
125
126const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const {
127  CHECK_LT(index, GetNumSymbols());
128  return dynsym_ + index;
129}
130
131const ElfW(Versym) *ElfMemImage::GetVersym(int index) const {
132  CHECK_LT(index, GetNumSymbols());
133  return versym_ + index;
134}
135
136const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
137  CHECK_LT(index, ehdr_->e_phnum);
138  return GetTableElement<ElfW(Phdr)>(ehdr_,
139                                     ehdr_->e_phoff,
140                                     ehdr_->e_phentsize,
141                                     index);
142}
143
144const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
145  CHECK_LT(offset, strsize_);
146  return dynstr_ + offset;
147}
148
149const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
150  if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
151    // Symbol corresponds to "special" (e.g. SHN_ABS) section.
152    return reinterpret_cast<const void *>(sym->st_value);
153  }
154  CHECK_LT(link_base_, sym->st_value);
155  return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_;
156}
157
158const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
159  CHECK_LE(index, verdefnum_);
160  const ElfW(Verdef) *version_definition = verdef_;
161  while (version_definition->vd_ndx < index && version_definition->vd_next) {
162    const char *const version_definition_as_char =
163        reinterpret_cast<const char *>(version_definition);
164    version_definition =
165        reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
166                                               version_definition->vd_next);
167  }
168  return version_definition->vd_ndx == index ? version_definition : NULL;
169}
170
171const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
172    const ElfW(Verdef) *verdef) const {
173  return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
174}
175
176const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
177  CHECK_LT(offset, strsize_);
178  return dynstr_ + offset;
179}
180
181void ElfMemImage::Init(const void *base) {
182  ehdr_      = NULL;
183  dynsym_    = NULL;
184  dynstr_    = NULL;
185  versym_    = NULL;
186  verdef_    = NULL;
187  hash_      = NULL;
188  strsize_   = 0;
189  verdefnum_ = 0;
190  link_base_ = ~0L;  // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
191  if (!base) {
192    return;
193  }
194  const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base);
195  // Fake VDSO has low bit set.
196  const bool fake_vdso = ((base_as_uintptr_t & 1) != 0);
197  base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1);
198  const char *const base_as_char = reinterpret_cast<const char *>(base);
199  if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
200      base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
201    RAW_DCHECK(false, "no ELF magic"); // at %p", base);
202    return;
203  }
204  int elf_class = base_as_char[EI_CLASS];
205  if (elf_class != CurrentElfClass::kElfClass) {
206    DCHECK_EQ(elf_class, CurrentElfClass::kElfClass);
207    return;
208  }
209  switch (base_as_char[EI_DATA]) {
210    case ELFDATA2LSB: {
211      if (__LITTLE_ENDIAN != __BYTE_ORDER) {
212        DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order";
213        return;
214      }
215      break;
216    }
217    case ELFDATA2MSB: {
218      if (__BIG_ENDIAN != __BYTE_ORDER) {
219        DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order";
220        return;
221      }
222      break;
223    }
224    default: {
225      RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA];
226      return;
227    }
228  }
229
230  ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
231  const ElfW(Phdr) *dynamic_program_header = NULL;
232  for (int i = 0; i < ehdr_->e_phnum; ++i) {
233    const ElfW(Phdr) *const program_header = GetPhdr(i);
234    switch (program_header->p_type) {
235      case PT_LOAD:
236        if (link_base_ == ~0L) {
237          link_base_ = program_header->p_vaddr;
238        }
239        break;
240      case PT_DYNAMIC:
241        dynamic_program_header = program_header;
242        break;
243    }
244  }
245  if (link_base_ == ~0L || !dynamic_program_header) {
246    RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO");
247    RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO");
248    // Mark this image as not present. Can not recur infinitely.
249    Init(0);
250    return;
251  }
252  ptrdiff_t relocation =
253      base_as_char - reinterpret_cast<const char *>(link_base_);
254  ElfW(Dyn) *dynamic_entry =
255      reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr +
256                                    relocation);
257  for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
258    ElfW(Xword) value = dynamic_entry->d_un.d_val;
259    if (fake_vdso) {
260      // A complication: in the real VDSO, dynamic entries are not relocated
261      // (it wasn't loaded by a dynamic loader). But when testing with a
262      // "fake" dlopen()ed vdso library, the loader relocates some (but
263      // not all!) of them before we get here.
264      if (dynamic_entry->d_tag == DT_VERDEF) {
265        // The only dynamic entry (of the ones we care about) libc-2.3.6
266        // loader doesn't relocate.
267        value += relocation;
268      }
269    } else {
270      // Real VDSO. Everything needs to be relocated.
271      value += relocation;
272    }
273    switch (dynamic_entry->d_tag) {
274      case DT_HASH:
275        hash_ = reinterpret_cast<ElfW(Word) *>(value);
276        break;
277      case DT_SYMTAB:
278        dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
279        break;
280      case DT_STRTAB:
281        dynstr_ = reinterpret_cast<const char *>(value);
282        break;
283      case DT_VERSYM:
284        versym_ = reinterpret_cast<ElfW(Versym) *>(value);
285        break;
286      case DT_VERDEF:
287        verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
288        break;
289      case DT_VERDEFNUM:
290        verdefnum_ = dynamic_entry->d_un.d_val;
291        break;
292      case DT_STRSZ:
293        strsize_ = dynamic_entry->d_un.d_val;
294        break;
295      default:
296        // Unrecognized entries explicitly ignored.
297        break;
298    }
299  }
300  if (!hash_ || !dynsym_ || !dynstr_ || !versym_ ||
301      !verdef_ || !verdefnum_ || !strsize_) {
302    RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)");
303    RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)");
304    RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)");
305    RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)");
306    RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)");
307    RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)");
308    RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)");
309    // Mark this image as not present. Can not recur infinitely.
310    Init(0);
311    return;
312  }
313}
314
315bool ElfMemImage::LookupSymbol(const char *name,
316                               const char *version,
317                               int type,
318                               SymbolInfo *info) const {
319  for (SymbolIterator it = begin(); it != end(); ++it) {
320    if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 &&
321        CurrentElfClass::ElfType(it->symbol) == type) {
322      if (info) {
323        *info = *it;
324      }
325      return true;
326    }
327  }
328  return false;
329}
330
331bool ElfMemImage::LookupSymbolByAddress(const void *address,
332                                        SymbolInfo *info_out) const {
333  for (SymbolIterator it = begin(); it != end(); ++it) {
334    const char *const symbol_start =
335        reinterpret_cast<const char *>(it->address);
336    const char *const symbol_end = symbol_start + it->symbol->st_size;
337    if (symbol_start <= address && address < symbol_end) {
338      if (info_out) {
339        // Client wants to know details for that symbol (the usual case).
340        if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) {
341          // Strong symbol; just return it.
342          *info_out = *it;
343          return true;
344        } else {
345          // Weak or local. Record it, but keep looking for a strong one.
346          *info_out = *it;
347        }
348      } else {
349        // Client only cares if there is an overlapping symbol.
350        return true;
351      }
352    }
353  }
354  return false;
355}
356
357ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index)
358    : index_(index), image_(image) {
359}
360
361const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
362  return &info_;
363}
364
365const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
366  return info_;
367}
368
369bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
370  return this->image_ == rhs.image_ && this->index_ == rhs.index_;
371}
372
373bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
374  return !(*this == rhs);
375}
376
377ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
378  this->Update(1);
379  return *this;
380}
381
382ElfMemImage::SymbolIterator ElfMemImage::begin() const {
383  SymbolIterator it(this, 0);
384  it.Update(0);
385  return it;
386}
387
388ElfMemImage::SymbolIterator ElfMemImage::end() const {
389  return SymbolIterator(this, GetNumSymbols());
390}
391
392void ElfMemImage::SymbolIterator::Update(int increment) {
393  const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
394  CHECK(image->IsPresent() || increment == 0);
395  if (!image->IsPresent()) {
396    return;
397  }
398  index_ += increment;
399  if (index_ >= image->GetNumSymbols()) {
400    index_ = image->GetNumSymbols();
401    return;
402  }
403  const ElfW(Sym)    *symbol = image->GetDynsym(index_);
404  const ElfW(Versym) *version_symbol = image->GetVersym(index_);
405  CHECK(symbol && version_symbol);
406  const char *const symbol_name = image->GetDynstr(symbol->st_name);
407  const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
408  const ElfW(Verdef) *version_definition = NULL;
409  const char *version_name = "";
410  if (symbol->st_shndx == SHN_UNDEF) {
411    // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
412    // version_index could well be greater than verdefnum_, so calling
413    // GetVerdef(version_index) may trigger assertion.
414  } else {
415    version_definition = image->GetVerdef(version_index);
416  }
417  if (version_definition) {
418    // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
419    // optional 2nd if the version has a parent.
420    CHECK_LE(1, version_definition->vd_cnt);
421    CHECK_LE(version_definition->vd_cnt, 2);
422    const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
423    version_name = image->GetVerstr(version_aux->vda_name);
424  }
425  info_.name    = symbol_name;
426  info_.version = version_name;
427  info_.address = image->GetSymAddr(symbol);
428  info_.symbol  = symbol;
429}
430
431}  // namespace base
432
433#endif  // HAVE_ELF_MEM_IMAGE
434