1// Copyright (c) 2008, Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30// --- 31// Author: Paul Pluzhnikov 32// 33// Allow dynamic symbol lookup in an in-memory Elf image. 34// 35 36#include "base/elf_mem_image.h" 37 38#ifdef HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h 39 40#include <stddef.h> // for size_t, ptrdiff_t 41#include "base/logging.h" 42 43// From binutils/include/elf/common.h (this doesn't appear to be documented 44// anywhere else). 45// 46// /* This flag appears in a Versym structure. It means that the symbol 47// is hidden, and is only visible with an explicit version number. 48// This is a GNU extension. */ 49// #define VERSYM_HIDDEN 0x8000 50// 51// /* This is the mask for the rest of the Versym information. */ 52// #define VERSYM_VERSION 0x7fff 53 54#define VERSYM_VERSION 0x7fff 55 56namespace base { 57 58namespace { 59template <int N> class ElfClass { 60 public: 61 static const int kElfClass = -1; 62 static int ElfBind(const ElfW(Sym) *) { 63 CHECK(false); // << "Unexpected word size"; 64 return 0; 65 } 66 static int ElfType(const ElfW(Sym) *) { 67 CHECK(false); // << "Unexpected word size"; 68 return 0; 69 } 70}; 71 72template <> class ElfClass<32> { 73 public: 74 static const int kElfClass = ELFCLASS32; 75 static int ElfBind(const ElfW(Sym) *symbol) { 76 return ELF32_ST_BIND(symbol->st_info); 77 } 78 static int ElfType(const ElfW(Sym) *symbol) { 79 return ELF32_ST_TYPE(symbol->st_info); 80 } 81}; 82 83template <> class ElfClass<64> { 84 public: 85 static const int kElfClass = ELFCLASS64; 86 static int ElfBind(const ElfW(Sym) *symbol) { 87 return ELF64_ST_BIND(symbol->st_info); 88 } 89 static int ElfType(const ElfW(Sym) *symbol) { 90 return ELF64_ST_TYPE(symbol->st_info); 91 } 92}; 93 94typedef ElfClass<__WORDSIZE> CurrentElfClass; 95 96// Extract an element from one of the ELF tables, cast it to desired type. 97// This is just a simple arithmetic and a glorified cast. 98// Callers are responsible for bounds checking. 99template <class T> 100const T* GetTableElement(const ElfW(Ehdr) *ehdr, 101 ElfW(Off) table_offset, 102 ElfW(Word) element_size, 103 size_t index) { 104 return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) 105 + table_offset 106 + index * element_size); 107} 108} // namespace 109 110const void *const ElfMemImage::kInvalidBase = 111 reinterpret_cast<const void *>(~0L); 112 113ElfMemImage::ElfMemImage(const void *base) { 114 CHECK(base != kInvalidBase); 115 Init(base); 116} 117 118int ElfMemImage::GetNumSymbols() const { 119 if (!hash_) { 120 return 0; 121 } 122 // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash 123 return hash_[1]; 124} 125 126const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { 127 CHECK_LT(index, GetNumSymbols()); 128 return dynsym_ + index; 129} 130 131const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { 132 CHECK_LT(index, GetNumSymbols()); 133 return versym_ + index; 134} 135 136const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { 137 CHECK_LT(index, ehdr_->e_phnum); 138 return GetTableElement<ElfW(Phdr)>(ehdr_, 139 ehdr_->e_phoff, 140 ehdr_->e_phentsize, 141 index); 142} 143 144const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { 145 CHECK_LT(offset, strsize_); 146 return dynstr_ + offset; 147} 148 149const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { 150 if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { 151 // Symbol corresponds to "special" (e.g. SHN_ABS) section. 152 return reinterpret_cast<const void *>(sym->st_value); 153 } 154 CHECK_LT(link_base_, sym->st_value); 155 return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_; 156} 157 158const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { 159 CHECK_LE(index, verdefnum_); 160 const ElfW(Verdef) *version_definition = verdef_; 161 while (version_definition->vd_ndx < index && version_definition->vd_next) { 162 const char *const version_definition_as_char = 163 reinterpret_cast<const char *>(version_definition); 164 version_definition = 165 reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + 166 version_definition->vd_next); 167 } 168 return version_definition->vd_ndx == index ? version_definition : NULL; 169} 170 171const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( 172 const ElfW(Verdef) *verdef) const { 173 return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); 174} 175 176const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { 177 CHECK_LT(offset, strsize_); 178 return dynstr_ + offset; 179} 180 181void ElfMemImage::Init(const void *base) { 182 ehdr_ = NULL; 183 dynsym_ = NULL; 184 dynstr_ = NULL; 185 versym_ = NULL; 186 verdef_ = NULL; 187 hash_ = NULL; 188 strsize_ = 0; 189 verdefnum_ = 0; 190 link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. 191 if (!base) { 192 return; 193 } 194 const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base); 195 // Fake VDSO has low bit set. 196 const bool fake_vdso = ((base_as_uintptr_t & 1) != 0); 197 base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1); 198 const char *const base_as_char = reinterpret_cast<const char *>(base); 199 if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || 200 base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { 201 RAW_DCHECK(false, "no ELF magic"); // at %p", base); 202 return; 203 } 204 int elf_class = base_as_char[EI_CLASS]; 205 if (elf_class != CurrentElfClass::kElfClass) { 206 DCHECK_EQ(elf_class, CurrentElfClass::kElfClass); 207 return; 208 } 209 switch (base_as_char[EI_DATA]) { 210 case ELFDATA2LSB: { 211 if (__LITTLE_ENDIAN != __BYTE_ORDER) { 212 DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; 213 return; 214 } 215 break; 216 } 217 case ELFDATA2MSB: { 218 if (__BIG_ENDIAN != __BYTE_ORDER) { 219 DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; 220 return; 221 } 222 break; 223 } 224 default: { 225 RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA]; 226 return; 227 } 228 } 229 230 ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); 231 const ElfW(Phdr) *dynamic_program_header = NULL; 232 for (int i = 0; i < ehdr_->e_phnum; ++i) { 233 const ElfW(Phdr) *const program_header = GetPhdr(i); 234 switch (program_header->p_type) { 235 case PT_LOAD: 236 if (link_base_ == ~0L) { 237 link_base_ = program_header->p_vaddr; 238 } 239 break; 240 case PT_DYNAMIC: 241 dynamic_program_header = program_header; 242 break; 243 } 244 } 245 if (link_base_ == ~0L || !dynamic_program_header) { 246 RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO"); 247 RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO"); 248 // Mark this image as not present. Can not recur infinitely. 249 Init(0); 250 return; 251 } 252 ptrdiff_t relocation = 253 base_as_char - reinterpret_cast<const char *>(link_base_); 254 ElfW(Dyn) *dynamic_entry = 255 reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr + 256 relocation); 257 for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { 258 ElfW(Xword) value = dynamic_entry->d_un.d_val; 259 if (fake_vdso) { 260 // A complication: in the real VDSO, dynamic entries are not relocated 261 // (it wasn't loaded by a dynamic loader). But when testing with a 262 // "fake" dlopen()ed vdso library, the loader relocates some (but 263 // not all!) of them before we get here. 264 if (dynamic_entry->d_tag == DT_VERDEF) { 265 // The only dynamic entry (of the ones we care about) libc-2.3.6 266 // loader doesn't relocate. 267 value += relocation; 268 } 269 } else { 270 // Real VDSO. Everything needs to be relocated. 271 value += relocation; 272 } 273 switch (dynamic_entry->d_tag) { 274 case DT_HASH: 275 hash_ = reinterpret_cast<ElfW(Word) *>(value); 276 break; 277 case DT_SYMTAB: 278 dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); 279 break; 280 case DT_STRTAB: 281 dynstr_ = reinterpret_cast<const char *>(value); 282 break; 283 case DT_VERSYM: 284 versym_ = reinterpret_cast<ElfW(Versym) *>(value); 285 break; 286 case DT_VERDEF: 287 verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); 288 break; 289 case DT_VERDEFNUM: 290 verdefnum_ = dynamic_entry->d_un.d_val; 291 break; 292 case DT_STRSZ: 293 strsize_ = dynamic_entry->d_un.d_val; 294 break; 295 default: 296 // Unrecognized entries explicitly ignored. 297 break; 298 } 299 } 300 if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || 301 !verdef_ || !verdefnum_ || !strsize_) { 302 RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)"); 303 RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)"); 304 RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)"); 305 RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)"); 306 RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)"); 307 RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)"); 308 RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)"); 309 // Mark this image as not present. Can not recur infinitely. 310 Init(0); 311 return; 312 } 313} 314 315bool ElfMemImage::LookupSymbol(const char *name, 316 const char *version, 317 int type, 318 SymbolInfo *info) const { 319 for (SymbolIterator it = begin(); it != end(); ++it) { 320 if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 && 321 CurrentElfClass::ElfType(it->symbol) == type) { 322 if (info) { 323 *info = *it; 324 } 325 return true; 326 } 327 } 328 return false; 329} 330 331bool ElfMemImage::LookupSymbolByAddress(const void *address, 332 SymbolInfo *info_out) const { 333 for (SymbolIterator it = begin(); it != end(); ++it) { 334 const char *const symbol_start = 335 reinterpret_cast<const char *>(it->address); 336 const char *const symbol_end = symbol_start + it->symbol->st_size; 337 if (symbol_start <= address && address < symbol_end) { 338 if (info_out) { 339 // Client wants to know details for that symbol (the usual case). 340 if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) { 341 // Strong symbol; just return it. 342 *info_out = *it; 343 return true; 344 } else { 345 // Weak or local. Record it, but keep looking for a strong one. 346 *info_out = *it; 347 } 348 } else { 349 // Client only cares if there is an overlapping symbol. 350 return true; 351 } 352 } 353 } 354 return false; 355} 356 357ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) 358 : index_(index), image_(image) { 359} 360 361const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { 362 return &info_; 363} 364 365const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { 366 return info_; 367} 368 369bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { 370 return this->image_ == rhs.image_ && this->index_ == rhs.index_; 371} 372 373bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { 374 return !(*this == rhs); 375} 376 377ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { 378 this->Update(1); 379 return *this; 380} 381 382ElfMemImage::SymbolIterator ElfMemImage::begin() const { 383 SymbolIterator it(this, 0); 384 it.Update(0); 385 return it; 386} 387 388ElfMemImage::SymbolIterator ElfMemImage::end() const { 389 return SymbolIterator(this, GetNumSymbols()); 390} 391 392void ElfMemImage::SymbolIterator::Update(int increment) { 393 const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); 394 CHECK(image->IsPresent() || increment == 0); 395 if (!image->IsPresent()) { 396 return; 397 } 398 index_ += increment; 399 if (index_ >= image->GetNumSymbols()) { 400 index_ = image->GetNumSymbols(); 401 return; 402 } 403 const ElfW(Sym) *symbol = image->GetDynsym(index_); 404 const ElfW(Versym) *version_symbol = image->GetVersym(index_); 405 CHECK(symbol && version_symbol); 406 const char *const symbol_name = image->GetDynstr(symbol->st_name); 407 const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; 408 const ElfW(Verdef) *version_definition = NULL; 409 const char *version_name = ""; 410 if (symbol->st_shndx == SHN_UNDEF) { 411 // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and 412 // version_index could well be greater than verdefnum_, so calling 413 // GetVerdef(version_index) may trigger assertion. 414 } else { 415 version_definition = image->GetVerdef(version_index); 416 } 417 if (version_definition) { 418 // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, 419 // optional 2nd if the version has a parent. 420 CHECK_LE(1, version_definition->vd_cnt); 421 CHECK_LE(version_definition->vd_cnt, 2); 422 const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); 423 version_name = image->GetVerstr(version_aux->vda_name); 424 } 425 info_.name = symbol_name; 426 info_.version = version_name; 427 info_.address = image->GetSymAddr(symbol); 428 info_.symbol = symbol; 429} 430 431} // namespace base 432 433#endif // HAVE_ELF_MEM_IMAGE 434