1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "read_elf.h"
18#include "read_apk.h"
19
20#include <stdio.h>
21#include <string.h>
22#include <sys/stat.h>
23#include <sys/types.h>
24
25#include <algorithm>
26#include <limits>
27
28#include <android-base/file.h>
29#include <android-base/logging.h>
30
31#pragma clang diagnostic push
32#pragma clang diagnostic ignored "-Wunused-parameter"
33
34#include <llvm/ADT/StringRef.h>
35#include <llvm/Object/Binary.h>
36#include <llvm/Object/ELFObjectFile.h>
37#include <llvm/Object/ObjectFile.h>
38
39#pragma clang diagnostic pop
40
41#include "utils.h"
42
43#define ELF_NOTE_GNU "GNU"
44#define NT_GNU_BUILD_ID 3
45
46
47bool IsValidElfFile(int fd) {
48  static const char elf_magic[] = {0x7f, 'E', 'L', 'F'};
49  char buf[4];
50  return android::base::ReadFully(fd, buf, 4) && memcmp(buf, elf_magic, 4) == 0;
51}
52
53bool IsValidElfPath(const std::string& filename) {
54  if (!IsRegularFile(filename)) {
55    return false;
56  }
57  std::string mode = std::string("rb") + CLOSE_ON_EXEC_MODE;
58  FILE* fp = fopen(filename.c_str(), mode.c_str());
59  if (fp == nullptr) {
60    return false;
61  }
62  bool result = IsValidElfFile(fileno(fp));
63  fclose(fp);
64  return result;
65}
66
67static bool GetBuildIdFromNoteSection(const char* section, size_t section_size, BuildId* build_id) {
68  const char* p = section;
69  const char* end = p + section_size;
70  while (p < end) {
71    CHECK_LE(p + 12, end);
72    size_t namesz = *reinterpret_cast<const uint32_t*>(p);
73    p += 4;
74    size_t descsz = *reinterpret_cast<const uint32_t*>(p);
75    p += 4;
76    uint32_t type = *reinterpret_cast<const uint32_t*>(p);
77    p += 4;
78    namesz = ALIGN(namesz, 4);
79    descsz = ALIGN(descsz, 4);
80    CHECK_LE(p + namesz + descsz, end);
81    if ((type == NT_GNU_BUILD_ID) && (strcmp(p, ELF_NOTE_GNU) == 0)) {
82      *build_id = BuildId(p + namesz, descsz);
83      return true;
84    }
85    p += namesz + descsz;
86  }
87  return false;
88}
89
90bool GetBuildIdFromNoteFile(const std::string& filename, BuildId* build_id) {
91  std::string content;
92  if (!android::base::ReadFileToString(filename, &content)) {
93    LOG(DEBUG) << "can't read note file " << filename;
94    return false;
95  }
96  if (GetBuildIdFromNoteSection(content.c_str(), content.size(), build_id) == false) {
97    LOG(DEBUG) << "can't read build_id from note file " << filename;
98    return false;
99  }
100  return true;
101}
102
103template <class ELFT>
104bool GetBuildIdFromELFFile(const llvm::object::ELFFile<ELFT>* elf, BuildId* build_id) {
105  for (auto section_iterator = elf->section_begin(); section_iterator != elf->section_end();
106       ++section_iterator) {
107    if (section_iterator->sh_type == llvm::ELF::SHT_NOTE) {
108      auto contents = elf->getSectionContents(&*section_iterator);
109      if (contents.getError()) {
110        LOG(DEBUG) << "read note section error";
111        continue;
112      }
113      if (GetBuildIdFromNoteSection(reinterpret_cast<const char*>(contents->data()),
114                                    contents->size(), build_id)) {
115        return true;
116      }
117    }
118  }
119  return false;
120}
121
122static bool GetBuildIdFromObjectFile(llvm::object::ObjectFile* obj, BuildId* build_id) {
123  bool result = false;
124  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(obj)) {
125    result = GetBuildIdFromELFFile(elf->getELFFile(), build_id);
126  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(obj)) {
127    result = GetBuildIdFromELFFile(elf->getELFFile(), build_id);
128  } else {
129    LOG(ERROR) << "unknown elf format in file " << obj->getFileName().data();
130    return false;
131  }
132  if (!result) {
133    LOG(DEBUG) << "no build id present in file " << obj->getFileName().data();
134  }
135  return result;
136}
137
138struct BinaryRet {
139  llvm::object::OwningBinary<llvm::object::Binary> binary;
140  llvm::object::ObjectFile* obj;
141
142  BinaryRet() : obj(nullptr) {
143  }
144};
145
146static BinaryRet OpenObjectFile(const std::string& filename, uint64_t file_offset = 0,
147                                uint64_t file_size = 0) {
148  BinaryRet ret;
149  FileHelper fhelper = FileHelper::OpenReadOnly(filename);
150  if (!fhelper) {
151    PLOG(DEBUG) << "failed to open " << filename;
152    return ret;
153  }
154  if (file_size == 0) {
155    file_size = GetFileSize(filename);
156    if (file_size == 0) {
157      PLOG(ERROR) << "failed to get size of file " << filename;
158      return ret;
159    }
160  }
161  auto buffer_or_err = llvm::MemoryBuffer::getOpenFileSlice(fhelper.fd(), filename, file_size, file_offset);
162  if (!buffer_or_err) {
163    LOG(ERROR) << "failed to read " << filename << " [" << file_offset << "-" << (file_offset + file_size)
164        << "]: " << buffer_or_err.getError().message();
165    return ret;
166  }
167  auto binary_or_err = llvm::object::createBinary(buffer_or_err.get()->getMemBufferRef());
168  if (!binary_or_err) {
169    LOG(ERROR) << filename << " [" << file_offset << "-" << (file_offset + file_size)
170        << "] is not a binary file: " << binary_or_err.getError().message();
171    return ret;
172  }
173  ret.binary = llvm::object::OwningBinary<llvm::object::Binary>(std::move(binary_or_err.get()),
174                                                                std::move(buffer_or_err.get()));
175  ret.obj = llvm::dyn_cast<llvm::object::ObjectFile>(ret.binary.getBinary());
176  if (ret.obj == nullptr) {
177    LOG(ERROR) << filename << " [" << file_offset << "-" << (file_offset + file_size)
178        << "] is not an object file";
179  }
180  return ret;
181}
182
183bool GetBuildIdFromElfFile(const std::string& filename, BuildId* build_id) {
184  if (!IsValidElfPath(filename)) {
185    return false;
186  }
187  bool result = GetBuildIdFromEmbeddedElfFile(filename, 0, 0, build_id);
188  LOG(VERBOSE) << "GetBuildIdFromElfFile(" << filename << ") => " << build_id->ToString();
189  return result;
190}
191
192bool GetBuildIdFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
193                                   uint32_t file_size, BuildId* build_id) {
194  BinaryRet ret = OpenObjectFile(filename, file_offset, file_size);
195  if (ret.obj == nullptr) {
196    return false;
197  }
198  return GetBuildIdFromObjectFile(ret.obj, build_id);
199}
200
201bool IsArmMappingSymbol(const char* name) {
202  // Mapping symbols in arm, which are described in "ELF for ARM Architecture" and
203  // "ELF for ARM 64-bit Architecture". The regular expression to match mapping symbol
204  // is ^\$(a|d|t|x)(\..*)?$
205  return name[0] == '$' && strchr("adtx", name[1]) != nullptr && (name[2] == '\0' || name[2] == '.');
206}
207
208template <class ELFT>
209void ParseSymbolsFromELFFile(const llvm::object::ELFObjectFile<ELFT>* elf_obj,
210                             std::function<void(const ElfFileSymbol&)> callback) {
211  auto elf = elf_obj->getELFFile();
212  bool is_arm = (elf->getHeader()->e_machine == llvm::ELF::EM_ARM ||
213                 elf->getHeader()->e_machine == llvm::ELF::EM_AARCH64);
214  auto begin = elf_obj->symbol_begin();
215  auto end = elf_obj->symbol_end();
216  if (begin == end) {
217    begin = elf_obj->dynamic_symbol_begin();
218    end = elf_obj->dynamic_symbol_end();
219  }
220  for (; begin != end; ++begin) {
221    ElfFileSymbol symbol;
222    auto elf_symbol = static_cast<const llvm::object::ELFSymbolRef*>(&*begin);
223    auto section_it = elf_symbol->getSection();
224    if (!section_it) {
225      continue;
226    }
227    llvm::StringRef section_name;
228    if (section_it.get()->getName(section_name) || section_name.empty()) {
229      continue;
230    }
231    if (section_name.str() == ".text") {
232      symbol.is_in_text_section = true;
233    }
234
235    auto symbol_name = elf_symbol->getName();
236    if (!symbol_name || symbol_name.get().empty()) {
237      continue;
238    }
239    symbol.name = symbol_name.get();
240    symbol.vaddr = elf_symbol->getValue();
241    if ((symbol.vaddr & 1) != 0 && is_arm) {
242      // Arm sets bit 0 to mark it as thumb code, remove the flag.
243      symbol.vaddr &= ~1;
244    }
245    symbol.len = elf_symbol->getSize();
246    int type = elf_symbol->getELFType();
247    if (type == llvm::ELF::STT_FUNC) {
248      symbol.is_func = true;
249    } else if (type == llvm::ELF::STT_NOTYPE) {
250      if (symbol.is_in_text_section) {
251        symbol.is_label = true;
252        if (is_arm) {
253          // Remove mapping symbols in arm.
254          const char* p = (symbol.name.compare(0, linker_prefix.size(), linker_prefix) == 0)
255                              ? symbol.name.c_str() + linker_prefix.size()
256                              : symbol.name.c_str();
257          if (IsArmMappingSymbol(p)) {
258            symbol.is_label = false;
259          }
260        }
261      }
262    }
263
264    callback(symbol);
265  }
266}
267
268bool MatchBuildId(llvm::object::ObjectFile* obj, const BuildId& expected_build_id,
269                  const std::string& debug_filename) {
270  if (expected_build_id.IsEmpty()) {
271    return true;
272  }
273  BuildId real_build_id;
274  if (!GetBuildIdFromObjectFile(obj, &real_build_id)) {
275    return false;
276  }
277  if (expected_build_id != real_build_id) {
278    LOG(DEBUG) << "build id for " << debug_filename << " mismatch: "
279               << "expected " << expected_build_id.ToString()
280               << ", real " << real_build_id.ToString();
281    return false;
282  }
283  return true;
284}
285
286bool ParseSymbolsFromElfFile(const std::string& filename, const BuildId& expected_build_id,
287                             std::function<void(const ElfFileSymbol&)> callback) {
288  if (!IsValidElfPath(filename)) {
289    return false;
290  }
291  return ParseSymbolsFromEmbeddedElfFile(filename, 0, 0, expected_build_id, callback);
292}
293
294bool ParseSymbolsFromEmbeddedElfFile(const std::string& filename, uint64_t file_offset,
295                                     uint32_t file_size, const BuildId& expected_build_id,
296                                     std::function<void(const ElfFileSymbol&)> callback) {
297  BinaryRet ret = OpenObjectFile(filename, file_offset, file_size);
298  if (ret.obj == nullptr || !MatchBuildId(ret.obj, expected_build_id, filename)) {
299    return false;
300  }
301  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(ret.obj)) {
302    ParseSymbolsFromELFFile(elf, callback);
303  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(ret.obj)) {
304    ParseSymbolsFromELFFile(elf, callback);
305  } else {
306    LOG(ERROR) << "unknown elf format in file " << filename;
307    return false;
308  }
309  return true;
310}
311
312template <class ELFT>
313bool ReadMinExecutableVirtualAddress(const llvm::object::ELFFile<ELFT>* elf, uint64_t* p_vaddr) {
314  bool has_vaddr = false;
315  uint64_t min_addr = std::numeric_limits<uint64_t>::max();
316  for (auto it = elf->program_header_begin(); it != elf->program_header_end(); ++it) {
317    if ((it->p_type == llvm::ELF::PT_LOAD) && (it->p_flags & llvm::ELF::PF_X)) {
318      if (it->p_vaddr < min_addr) {
319        min_addr = it->p_vaddr;
320        has_vaddr = true;
321      }
322    }
323  }
324  if (has_vaddr) {
325    *p_vaddr = min_addr;
326  }
327  return has_vaddr;
328}
329
330bool ReadMinExecutableVirtualAddressFromElfFile(const std::string& filename,
331                                                const BuildId& expected_build_id,
332                                                uint64_t* min_vaddr) {
333  if (!IsValidElfPath(filename)) {
334    return false;
335  }
336  BinaryRet ret = OpenObjectFile(filename);
337  if (ret.obj == nullptr || !MatchBuildId(ret.obj, expected_build_id, filename)) {
338    return false;
339  }
340
341  bool result = false;
342  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(ret.obj)) {
343    result = ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
344  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(ret.obj)) {
345    result = ReadMinExecutableVirtualAddress(elf->getELFFile(), min_vaddr);
346  } else {
347    LOG(ERROR) << "unknown elf format in file" << filename;
348    return false;
349  }
350
351  if (!result) {
352    LOG(ERROR) << "no program header in file " << filename;
353  }
354  return result;
355}
356
357template <class ELFT>
358bool ReadSectionFromELFFile(const llvm::object::ELFFile<ELFT>* elf, const std::string& section_name,
359                            std::string* content) {
360  for (auto it = elf->section_begin(); it != elf->section_end(); ++it) {
361    auto name_or_err = elf->getSectionName(&*it);
362    if (name_or_err && *name_or_err == section_name) {
363      auto data_or_err = elf->getSectionContents(&*it);
364      if (!data_or_err) {
365        LOG(ERROR) << "failed to read section " << section_name;
366        return false;
367      }
368      content->append(data_or_err->begin(), data_or_err->end());
369      return true;
370    }
371  }
372  LOG(ERROR) << "can't find section " << section_name;
373  return false;
374}
375
376bool ReadSectionFromElfFile(const std::string& filename, const std::string& section_name,
377                            std::string* content) {
378  if (!IsValidElfPath(filename)) {
379    return false;
380  }
381  BinaryRet ret = OpenObjectFile(filename);
382  if (ret.obj == nullptr) {
383    return false;
384  }
385  bool result = false;
386  if (auto elf = llvm::dyn_cast<llvm::object::ELF32LEObjectFile>(ret.obj)) {
387    result = ReadSectionFromELFFile(elf->getELFFile(), section_name, content);
388  } else if (auto elf = llvm::dyn_cast<llvm::object::ELF64LEObjectFile>(ret.obj)) {
389    result = ReadSectionFromELFFile(elf->getELFFile(), section_name, content);
390  } else {
391    LOG(ERROR) << "unknown elf format in file" << filename;
392    return false;
393  }
394  return result;
395}
396