1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "read_elf.h"
18#include "read_apk.h"
19
20#include <stdio.h>
21#include <string.h>
22#include <sys/stat.h>
23#include <sys/types.h>
24
25#include <algorithm>
26#include <limits>
27
28#include <android-base/file.h>
29#include <android-base/logging.h>
30
31#pragma clang diagnostic push
32#pragma clang diagnostic ignored "-Wunused-parameter"
33
34#include <llvm/ADT/StringRef.h>
35#include <llvm/Object/Binary.h>
36#include <llvm/Object/ELFObjectFile.h>
37#include <llvm/Object/ObjectFile.h>
38
39#pragma clang diagnostic pop
40
41#include "utils.h"
42
43#define ELF_NOTE_GNU "GNU"
44#define NT_GNU_BUILD_ID 3
45
46std::ostream& operator<<(std::ostream& os, const ElfStatus& status) {
47  switch (status) {
48    case ElfStatus::NO_ERROR:
49      os << "No error";
50      break;
51    case ElfStatus::FILE_NOT_FOUND:
52      os << "File not found";
53      break;
54    case ElfStatus::READ_FAILED:
55      os << "Read failed";
56      break;
57    case ElfStatus::FILE_MALFORMED:
58      os << "Malformed file";
59      break;
60    case ElfStatus::NO_SYMBOL_TABLE:
61      os << "No symbol table";
62      break;
63    case ElfStatus::NO_BUILD_ID:
64      os << "No build id";
65      break;
66    case ElfStatus::BUILD_ID_MISMATCH:
67      os << "Build id mismatch";
68      break;
69    case ElfStatus::SECTION_NOT_FOUND:
70      os << "Section not found";
71      break;
72  }
73  return os;
74}
75
76ElfStatus IsValidElfFile(int fd) {
77  static const char elf_magic[] = {0x7f, 'E', 'L', 'F'};
78  char buf[4];
79  if (!android::base::ReadFully(fd, buf, 4)) {
80    return ElfStatus::READ_FAILED;
81  }
82  if (memcmp(buf, elf_magic, 4) != 0) {
83    return ElfStatus::FILE_MALFORMED;
84  }
85  return ElfStatus::NO_ERROR;
86}
87
88ElfStatus IsValidElfPath(const std::string& filename) {
89  if (!IsRegularFile(filename)) {
90    return ElfStatus::FILE_NOT_FOUND;
91  }
92  std::string mode = std::string("rb") + CLOSE_ON_EXEC_MODE;
93  FILE* fp = fopen(filename.c_str(), mode.c_str());
94  if (fp == nullptr) {
95    return ElfStatus::READ_FAILED;
96  }
97  ElfStatus result = IsValidElfFile(fileno(fp));
98  fclose(fp);
99  return result;
100}
101
102bool GetBuildIdFromNoteSection(const char* section, size_t section_size, BuildId* build_id) {
103  const char* p = section;
104  const char* end = p + section_size;
105  while (p < end) {
106    if (p + 12 >= end) {
107      return false;
108    }
109    uint32_t namesz;
110    uint32_t descsz;
111    uint32_t type;
112    MoveFromBinaryFormat(namesz, p);
113    MoveFromBinaryFormat(descsz, p);
114    MoveFromBinaryFormat(type, p);
115    namesz = Align(namesz, 4);
116    descsz = Align(descsz, 4);
117    if ((type == NT_GNU_BUILD_ID) && (p < end) && (strcmp(p, ELF_NOTE_GNU) == 0)) {
118      const char* desc_start = p + namesz;
119      const char* desc_end = desc_start + descsz;
120      if (desc_start > p && desc_start < desc_end && desc_end <= end) {
121        *build_id = BuildId(p + namesz, descsz);
122        return true;
123      } else {
124        return false;
125      }
126    }
127    p += namesz + descsz;
128  }
129  return false;
130}
131
132ElfStatus GetBuildIdFromNoteFile(const std::string& filename, BuildId* build_id) {
133  std::string content;
134  if (!android::base::ReadFileToString(filename, &content)) {
135    return ElfStatus::READ_FAILED;
136  }
137  if (!GetBuildIdFromNoteSection(content.c_str(), content.size(), build_id)) {
138    return ElfStatus::NO_BUILD_ID;
139  }
140  return ElfStatus::NO_ERROR;
141}
142
143template <class ELFT>
144ElfStatus GetBuildIdFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, BuildId* build_id) {
145  for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
146    const llvm::object::ELFSectionRef& section_ref = *it;
147    if (section_ref.getType() == llvm::ELF::SHT_NOTE) {
148      llvm::StringRef data;
149      if (it->getContents(data)) {
150        return ElfStatus::READ_FAILED;
151      }
152      if (GetBuildIdFromNoteSection(data.data(), data.size(), build_id)) {
153        return ElfStatus::NO_ERROR;
154      }
155    }
156  }
157  return ElfStatus::NO_BUILD_ID;
158}
159
160static ElfStatus GetBuildIdFromObjectFile(llvm::object::ObjectFile* obj, BuildId* build_id) {
161  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
162    return GetBuildIdFromELFFile(elf, build_id);
163  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
164    return GetBuildIdFromELFFile(elf, build_id);
165  }
166  return ElfStatus::FILE_MALFORMED;
167}
168
169struct BinaryWrapper {
170  llvm::object::OwningBinary<llvm::object::Binary> binary;
171  llvm::object::ObjectFile* obj;
172
173  BinaryWrapper() : obj(nullptr) {
174  }
175};
176
177static ElfStatus OpenObjectFile(const std::string& filename, uint64_t file_offset,
178                                uint64_t file_size, BinaryWrapper* wrapper) {
179  FileHelper fhelper = FileHelper::OpenReadOnly(filename);
180  if (!fhelper) {
181    return ElfStatus::READ_FAILED;
182  }
183  if (file_size == 0) {
184    file_size = GetFileSize(filename);
185    if (file_size == 0) {
186      return ElfStatus::READ_FAILED;
187    }
188  }
189  auto buffer_or_err = llvm::MemoryBuffer::getOpenFileSlice(fhelper.fd(), filename, file_size, file_offset);
190  if (!buffer_or_err) {
191    return ElfStatus::READ_FAILED;
192  }
193  auto binary_or_err = llvm::object::createBinary(buffer_or_err.get()->getMemBufferRef());
194  if (!binary_or_err) {
195    return ElfStatus::READ_FAILED;
196  }
197  wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
198                                                                        std::move(buffer_or_err.get()));
199  wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary());
200  if (wrapper->obj == nullptr) {
201    return ElfStatus::FILE_MALFORMED;
202  }
203  return ElfStatus::NO_ERROR;
204}
205
206static ElfStatus OpenObjectFileFromString(const std::string& s, BinaryWrapper* wrapper) {
207  auto buffer = llvm::MemoryBuffer::getMemBuffer(s);
208  auto binary_or_err = llvm::object::createBinary(buffer->getMemBufferRef());
209  if (!binary_or_err) {
210    return ElfStatus::FILE_MALFORMED;
211  }
212  wrapper->binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
213                                                                std::move(buffer));
214  wrapper->obj = llvm::dyn_cast<llvm::object::ObjectFile>(wrapper->binary.getBinary());
215  if (wrapper->obj == nullptr) {
216    return ElfStatus::FILE_MALFORMED;
217  }
218  return ElfStatus::NO_ERROR;
219}
220
221ElfStatus GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id) {
222  ElfStatus result = IsValidElfPath(filename);
223  if (result != ElfStatus::NO_ERROR) {
224    return result;
225  }
226  return GetBuildIdFromEmbeddedElfFile(filename, 0, 0, build_id);
227}
228
229ElfStatus GetBuildIdFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
230                                        uint32_t file_size, BuildId* build_id) {
231  BinaryWrapper wrapper;
232  ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper);
233  if (result != ElfStatus::NO_ERROR) {
234    return result;
235  }
236  return GetBuildIdFromObjectFile(wrapper.obj, build_id);
237}
238
239template <class ELFT>
240ElfStatus ReadSectionFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf, const std::string& section_name,
241                                 std::string* content) {
242  for (llvm::object::section_iterator it = elf->section_begin(); it != elf->section_end(); ++it) {
243    llvm::StringRef name;
244    if (it->getName(name) || name != section_name) {
245      continue;
246    }
247    llvm::StringRef data;
248    std::error_code err = it->getContents(data);
249    if (err) {
250      return ElfStatus::READ_FAILED;
251    }
252    *content = data;
253    return ElfStatus::NO_ERROR;
254  }
255  return ElfStatus::SECTION_NOT_FOUND;
256}
257
258bool IsArmMappingSymbol(const char* name) {
259  // Mapping symbols in arm, which are described in "ELF for ARM Architecture" and
260  // "ELF for ARM 64-bit Architecture". The regular expression to match mapping symbol
261  // is ^\$(a|d|t|x)(\..*)?$
262  return name[0] == '$' && strchr("adtx", name[1]) != nullptr && (name[2] == '\0' || name[2] == '.');
263}
264
265void ReadSymbolTable(llvm::object::symbol_iterator sym_begin,
266                     llvm::object::symbol_iterator sym_end,
267                     const std::function<void(const ElfFileSymbol&)>& callback,
268                     bool is_arm) {
269  for (; sym_begin != sym_end; ++sym_begin) {
270    ElfFileSymbol symbol;
271    auto symbol_ref = static_cast<const llvm::object::ELFSymbolRef*>(&*sym_begin);
272    llvm::Expected<llvm::object::section_iterator> section_it_or_err = symbol_ref->getSection();
273    if (!section_it_or_err) {
274      continue;
275    }
276
277    llvm::StringRef section_name;
278    if (section_it_or_err.get()->getName(section_name) || section_name.empty()) {
279      continue;
280    }
281    if (section_name == ".text") {
282      symbol.is_in_text_section = true;
283    }
284    llvm::Expected<llvm::StringRef> symbol_name_or_err = symbol_ref->getName();
285    if (!symbol_name_or_err || symbol_name_or_err.get().empty()) {
286      continue;
287    }
288
289    symbol.name = symbol_name_or_err.get();
290    symbol.vaddr = symbol_ref->getValue();
291    if ((symbol.vaddr & 1) != 0 && is_arm) {
292      // Arm sets bit 0 to mark it as thumb code, remove the flag.
293      symbol.vaddr &= ~1;
294    }
295    symbol.len = symbol_ref->getSize();
296    llvm::object::SymbolRef::Type symbol_type = *symbol_ref->getType();
297    if (symbol_type == llvm::object::SymbolRef::ST_Function) {
298      symbol.is_func = true;
299    } else if (symbol_type == llvm::object::SymbolRef::ST_Unknown) {
300      if (symbol.is_in_text_section) {
301        symbol.is_label = true;
302        if (is_arm) {
303          // Remove mapping symbols in arm.
304          const char* p = (symbol.name.compare(0, linker_prefix.size(), linker_prefix) == 0)
305                              ? symbol.name.c_str() + linker_prefix.size()
306                              : symbol.name.c_str();
307          if (IsArmMappingSymbol(p)) {
308            symbol.is_label = false;
309          }
310        }
311      }
312    }
313
314    callback(symbol);
315  }
316}
317
318template <class ELFT>
319void AddSymbolForPltSection(const llvm::object::ELFObjectFile<ELFT>* elf,
320                            const std::function<void(const ElfFileSymbol&)>& callback) {
321  // We may sample instructions in .plt section if the program
322  // calls functions from shared libraries. Different architectures use
323  // different formats to store .plt section, so it needs a lot of work to match
324  // instructions in .plt section to symbols. As samples in .plt section rarely
325  // happen, and .plt section can hardly be a performance bottleneck, we can
326  // just use a symbol @plt to represent instructions in .plt section.
327  for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
328    const llvm::object::ELFSectionRef& section_ref = *it;
329    llvm::StringRef section_name;
330    std::error_code err = section_ref.getName(section_name);
331    if (err || section_name != ".plt") {
332      continue;
333    }
334    const auto* shdr = elf->getSection(section_ref.getRawDataRefImpl());
335    if (shdr == nullptr) {
336      return;
337    }
338    ElfFileSymbol symbol;
339    symbol.vaddr = shdr->sh_addr;
340    symbol.len = shdr->sh_size;
341    symbol.is_func = true;
342    symbol.is_label = true;
343    symbol.is_in_text_section = true;
344    symbol.name = "@plt";
345    callback(symbol);
346    return;
347  }
348}
349
350template <class ELFT>
351ElfStatus ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf,
352                                  const std::function<void(const ElfFileSymbol&)>& callback) {
353  auto machine = elf->getELFFile()->getHeader()->e_machine;
354  bool is_arm = (machine == llvm::ELF::EM_ARM || machine == llvm::ELF::EM_AARCH64);
355  AddSymbolForPltSection(elf, callback);
356  if (elf->symbol_begin() != elf->symbol_end()) {
357    ReadSymbolTable(elf->symbol_begin(), elf->symbol_end(), callback, is_arm);
358    return ElfStatus::NO_ERROR;
359  } else if (elf->dynamic_symbol_begin()->getRawDataRefImpl() != llvm::object::DataRefImpl()) {
360    ReadSymbolTable(elf->dynamic_symbol_begin(), elf->dynamic_symbol_end(), callback, is_arm);
361  }
362  std::string debugdata;
363  ElfStatus result = ReadSectionFromELFFile(elf, ".gnu_debugdata", &debugdata);
364  if (result == ElfStatus::SECTION_NOT_FOUND) {
365    return ElfStatus::NO_SYMBOL_TABLE;
366  } else if (result == ElfStatus::NO_ERROR) {
367    std::string decompressed_data;
368    if (XzDecompress(debugdata, &decompressed_data)) {
369      BinaryWrapper wrapper;
370      result = OpenObjectFileFromString(decompressed_data, &wrapper);
371      if (result == ElfStatus::NO_ERROR) {
372        if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
373          return ParseSymbolsFromELFFile(elf, callback);
374        } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
375          return ParseSymbolsFromELFFile(elf, callback);
376        } else {
377          return ElfStatus::FILE_MALFORMED;
378        }
379      }
380    }
381  }
382  return result;
383}
384
385ElfStatus MatchBuildId(llvm::object::ObjectFile* obj, const BuildId& expected_build_id) {
386  if (expected_build_id.IsEmpty()) {
387    return ElfStatus::NO_ERROR;
388  }
389  BuildId real_build_id;
390  ElfStatus result = GetBuildIdFromObjectFile(obj, &real_build_id);
391  if (result != ElfStatus::NO_ERROR) {
392    return result;
393  }
394  if (expected_build_id != real_build_id) {
395    return ElfStatus::BUILD_ID_MISMATCH;
396  }
397  return ElfStatus::NO_ERROR;
398}
399
400ElfStatus ParseSymbolsFromElfFile(const std::string& filename,
401                                  const BuildId& expected_build_id,
402                                  const std::function<void(const ElfFileSymbol&)>& callback) {
403  ElfStatus result = IsValidElfPath(filename);
404  if (result != ElfStatus::NO_ERROR) {
405    return result;
406  }
407  return ParseSymbolsFromEmbeddedElfFile(filename, 0, 0, expected_build_id, callback);
408}
409
410ElfStatus ParseSymbolsFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
411                                     uint32_t file_size, const BuildId& expected_build_id,
412                                     const std::function<void(const ElfFileSymbol&)>& callback) {
413  BinaryWrapper wrapper;
414  ElfStatus result = OpenObjectFile(filename, file_offset, file_size, &wrapper);
415  if (result != ElfStatus::NO_ERROR) {
416    return result;
417  }
418  result = MatchBuildId(wrapper.obj, expected_build_id);
419  if (result != ElfStatus::NO_ERROR) {
420    return result;
421  }
422  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
423    return ParseSymbolsFromELFFile(elf, callback);
424  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
425    return ParseSymbolsFromELFFile(elf, callback);
426  }
427  return ElfStatus::FILE_MALFORMED;
428}
429
430template <class ELFT>
431ElfStatus ReadMinExecutableVirtualAddress(const llvm::object::ELFFile<ELFT>* elf, uint64_t* p_vaddr) {
432  bool has_vaddr = false;
433  uint64_t min_addr = std::numeric_limits<uint64_t>::max();
434  for (auto it = elf->program_header_begin(); it != elf->program_header_end(); ++it) {
435    if ((it->p_type == llvm::ELF::PT_LOAD) && (it->p_flags & llvm::ELF::PF_X)) {
436      if (it->p_vaddr < min_addr) {
437        min_addr = it->p_vaddr;
438        has_vaddr = true;
439      }
440    }
441  }
442  if (!has_vaddr) {
443    return ElfStatus::FILE_MALFORMED;
444  }
445  *p_vaddr = min_addr;
446  return ElfStatus::NO_ERROR;
447}
448
449ElfStatus ReadMinExecutableVirtualAddressFromElfFile(const std::string& filename,
450                                                     const BuildId& expected_build_id,
451                                                     uint64_t* min_vaddr) {
452  ElfStatus result = IsValidElfPath(filename);
453  if (result != ElfStatus::NO_ERROR) {
454    return result;
455  }
456  BinaryWrapper wrapper;
457  result = OpenObjectFile(filename, 0, 0, &wrapper);
458  if (result != ElfStatus::NO_ERROR) {
459    return result;
460  }
461  result = MatchBuildId(wrapper.obj, expected_build_id);
462  if (result != ElfStatus::NO_ERROR) {
463    return result;
464  }
465
466  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
467    return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
468  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
469    return ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
470  } else {
471    return ElfStatus::FILE_MALFORMED;
472  }
473}
474
475ElfStatus ReadSectionFromElfFile(const std::string& filename, const std::string& section_name,
476                                 std::string* content) {
477  ElfStatus result = IsValidElfPath(filename);
478  if (result != ElfStatus::NO_ERROR) {
479    return result;
480  }
481  BinaryWrapper wrapper;
482  result = OpenObjectFile(filename, 0, 0, &wrapper);
483  if (result != ElfStatus::NO_ERROR) {
484    return result;
485  }
486  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(wrapper.obj)) {
487    return ReadSectionFromELFFile(elf, section_name, content);
488  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(wrapper.obj)) {
489    return ReadSectionFromELFFile(elf, section_name, content);
490  } else {
491    return ElfStatus::FILE_MALFORMED;
492  }
493}
494