1//===-- LLVMSymbolize.cpp -------------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation for LLVM symbolization library.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LLVMSymbolize.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/Config/config.h"
17#include "llvm/Object/ELFObjectFile.h"
18#include "llvm/Object/MachO.h"
19#include "llvm/Support/Casting.h"
20#include "llvm/Support/Compression.h"
21#include "llvm/Support/DataExtractor.h"
22#include "llvm/Support/Errc.h"
23#include "llvm/Support/FileSystem.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/Path.h"
26#include <sstream>
27#include <stdlib.h>
28
29namespace llvm {
30namespace symbolize {
31
32static bool error(std::error_code ec) {
33  if (!ec)
34    return false;
35  errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n";
36  return true;
37}
38
39static DILineInfoSpecifier
40getDILineInfoSpecifier(const LLVMSymbolizer::Options &Opts) {
41  return DILineInfoSpecifier(
42      DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
43      Opts.PrintFunctions);
44}
45
46ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
47    : Module(Obj), DebugInfoContext(DICtx) {
48  for (const SymbolRef &Symbol : Module->symbols()) {
49    addSymbol(Symbol);
50  }
51  bool NoSymbolTable = (Module->symbol_begin() == Module->symbol_end());
52  if (NoSymbolTable && Module->isELF()) {
53    // Fallback to dynamic symbol table, if regular symbol table is stripped.
54    std::pair<symbol_iterator, symbol_iterator> IDyn =
55        getELFDynamicSymbolIterators(Module);
56    for (symbol_iterator si = IDyn.first, se = IDyn.second; si != se; ++si) {
57      addSymbol(*si);
58    }
59  }
60}
61
62void ModuleInfo::addSymbol(const SymbolRef &Symbol) {
63  SymbolRef::Type SymbolType;
64  if (error(Symbol.getType(SymbolType)))
65    return;
66  if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
67    return;
68  uint64_t SymbolAddress;
69  if (error(Symbol.getAddress(SymbolAddress)) ||
70      SymbolAddress == UnknownAddressOrSize)
71    return;
72  uint64_t SymbolSize;
73  // Getting symbol size is linear for Mach-O files, so assume that symbol
74  // occupies the memory range up to the following symbol.
75  if (isa<MachOObjectFile>(Module))
76    SymbolSize = 0;
77  else if (error(Symbol.getSize(SymbolSize)) ||
78           SymbolSize == UnknownAddressOrSize)
79    return;
80  StringRef SymbolName;
81  if (error(Symbol.getName(SymbolName)))
82    return;
83  // Mach-O symbol table names have leading underscore, skip it.
84  if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
85    SymbolName = SymbolName.drop_front();
86  // FIXME: If a function has alias, there are two entries in symbol table
87  // with same address size. Make sure we choose the correct one.
88  SymbolMapTy &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
89  SymbolDesc SD = { SymbolAddress, SymbolSize };
90  M.insert(std::make_pair(SD, SymbolName));
91}
92
93bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
94                                        std::string &Name, uint64_t &Addr,
95                                        uint64_t &Size) const {
96  const SymbolMapTy &M = Type == SymbolRef::ST_Function ? Functions : Objects;
97  if (M.empty())
98    return false;
99  SymbolDesc SD = { Address, Address };
100  SymbolMapTy::const_iterator it = M.upper_bound(SD);
101  if (it == M.begin())
102    return false;
103  --it;
104  if (it->first.Size != 0 && it->first.Addr + it->first.Size <= Address)
105    return false;
106  Name = it->second.str();
107  Addr = it->first.Addr;
108  Size = it->first.Size;
109  return true;
110}
111
112DILineInfo ModuleInfo::symbolizeCode(
113    uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
114  DILineInfo LineInfo;
115  if (DebugInfoContext) {
116    LineInfo = DebugInfoContext->getLineInfoForAddress(
117        ModuleOffset, getDILineInfoSpecifier(Opts));
118  }
119  // Override function name from symbol table if necessary.
120  if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) {
121    std::string FunctionName;
122    uint64_t Start, Size;
123    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
124                               FunctionName, Start, Size)) {
125      LineInfo.FunctionName = FunctionName;
126    }
127  }
128  return LineInfo;
129}
130
131DIInliningInfo ModuleInfo::symbolizeInlinedCode(
132    uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
133  DIInliningInfo InlinedContext;
134  if (DebugInfoContext) {
135    InlinedContext = DebugInfoContext->getInliningInfoForAddress(
136        ModuleOffset, getDILineInfoSpecifier(Opts));
137  }
138  // Make sure there is at least one frame in context.
139  if (InlinedContext.getNumberOfFrames() == 0) {
140    InlinedContext.addFrame(DILineInfo());
141  }
142  // Override the function name in lower frame with name from symbol table.
143  if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) {
144    DIInliningInfo PatchedInlinedContext;
145    for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
146      DILineInfo LineInfo = InlinedContext.getFrame(i);
147      if (i == n - 1) {
148        std::string FunctionName;
149        uint64_t Start, Size;
150        if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
151                                   FunctionName, Start, Size)) {
152          LineInfo.FunctionName = FunctionName;
153        }
154      }
155      PatchedInlinedContext.addFrame(LineInfo);
156    }
157    InlinedContext = PatchedInlinedContext;
158  }
159  return InlinedContext;
160}
161
162bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name,
163                               uint64_t &Start, uint64_t &Size) const {
164  return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start,
165                                Size);
166}
167
168const char LLVMSymbolizer::kBadString[] = "??";
169
170std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
171                                          uint64_t ModuleOffset) {
172  ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
173  if (!Info)
174    return printDILineInfo(DILineInfo());
175  if (Opts.PrintInlining) {
176    DIInliningInfo InlinedContext =
177        Info->symbolizeInlinedCode(ModuleOffset, Opts);
178    uint32_t FramesNum = InlinedContext.getNumberOfFrames();
179    assert(FramesNum > 0);
180    std::string Result;
181    for (uint32_t i = 0; i < FramesNum; i++) {
182      DILineInfo LineInfo = InlinedContext.getFrame(i);
183      Result += printDILineInfo(LineInfo);
184    }
185    return Result;
186  }
187  DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts);
188  return printDILineInfo(LineInfo);
189}
190
191std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
192                                          uint64_t ModuleOffset) {
193  std::string Name = kBadString;
194  uint64_t Start = 0;
195  uint64_t Size = 0;
196  if (Opts.UseSymbolTable) {
197    if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) {
198      if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle)
199        Name = DemangleName(Name);
200    }
201  }
202  std::stringstream ss;
203  ss << Name << "\n" << Start << " " << Size << "\n";
204  return ss.str();
205}
206
207void LLVMSymbolizer::flush() {
208  DeleteContainerSeconds(Modules);
209  BinaryForPath.clear();
210  ObjectFileForArch.clear();
211}
212
213static std::string getDarwinDWARFResourceForPath(const std::string &Path) {
214  StringRef Basename = sys::path::filename(Path);
215  const std::string &DSymDirectory = Path + ".dSYM";
216  SmallString<16> ResourceName = StringRef(DSymDirectory);
217  sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
218  sys::path::append(ResourceName, Basename);
219  return ResourceName.str();
220}
221
222static bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
223  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
224      MemoryBuffer::getFileOrSTDIN(Path);
225  if (!MB)
226    return false;
227  return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
228}
229
230static bool findDebugBinary(const std::string &OrigPath,
231                            const std::string &DebuglinkName, uint32_t CRCHash,
232                            std::string &Result) {
233  std::string OrigRealPath = OrigPath;
234#if defined(HAVE_REALPATH)
235  if (char *RP = realpath(OrigPath.c_str(), nullptr)) {
236    OrigRealPath = RP;
237    free(RP);
238  }
239#endif
240  SmallString<16> OrigDir(OrigRealPath);
241  llvm::sys::path::remove_filename(OrigDir);
242  SmallString<16> DebugPath = OrigDir;
243  // Try /path/to/original_binary/debuglink_name
244  llvm::sys::path::append(DebugPath, DebuglinkName);
245  if (checkFileCRC(DebugPath, CRCHash)) {
246    Result = DebugPath.str();
247    return true;
248  }
249  // Try /path/to/original_binary/.debug/debuglink_name
250  DebugPath = OrigRealPath;
251  llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
252  if (checkFileCRC(DebugPath, CRCHash)) {
253    Result = DebugPath.str();
254    return true;
255  }
256  // Try /usr/lib/debug/path/to/original_binary/debuglink_name
257  DebugPath = "/usr/lib/debug";
258  llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
259                          DebuglinkName);
260  if (checkFileCRC(DebugPath, CRCHash)) {
261    Result = DebugPath.str();
262    return true;
263  }
264  return false;
265}
266
267static bool getGNUDebuglinkContents(const Binary *Bin, std::string &DebugName,
268                                    uint32_t &CRCHash) {
269  const ObjectFile *Obj = dyn_cast<ObjectFile>(Bin);
270  if (!Obj)
271    return false;
272  for (const SectionRef &Section : Obj->sections()) {
273    StringRef Name;
274    Section.getName(Name);
275    Name = Name.substr(Name.find_first_not_of("._"));
276    if (Name == "gnu_debuglink") {
277      StringRef Data;
278      Section.getContents(Data);
279      DataExtractor DE(Data, Obj->isLittleEndian(), 0);
280      uint32_t Offset = 0;
281      if (const char *DebugNameStr = DE.getCStr(&Offset)) {
282        // 4-byte align the offset.
283        Offset = (Offset + 3) & ~0x3;
284        if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
285          DebugName = DebugNameStr;
286          CRCHash = DE.getU32(&Offset);
287          return true;
288        }
289      }
290      break;
291    }
292  }
293  return false;
294}
295
296LLVMSymbolizer::BinaryPair
297LLVMSymbolizer::getOrCreateBinary(const std::string &Path) {
298  BinaryMapTy::iterator I = BinaryForPath.find(Path);
299  if (I != BinaryForPath.end())
300    return I->second;
301  Binary *Bin = nullptr;
302  Binary *DbgBin = nullptr;
303  ErrorOr<Binary *> BinaryOrErr = createBinary(Path);
304  if (!error(BinaryOrErr.getError())) {
305    std::unique_ptr<Binary> ParsedBinary(BinaryOrErr.get());
306    // Check if it's a universal binary.
307    Bin = ParsedBinary.get();
308    ParsedBinariesAndObjects.push_back(std::move(ParsedBinary));
309    if (Bin->isMachO() || Bin->isMachOUniversalBinary()) {
310      // On Darwin we may find DWARF in separate object file in
311      // resource directory.
312      const std::string &ResourcePath =
313          getDarwinDWARFResourceForPath(Path);
314      BinaryOrErr = createBinary(ResourcePath);
315      std::error_code EC = BinaryOrErr.getError();
316      if (EC != errc::no_such_file_or_directory && !error(EC)) {
317        DbgBin = BinaryOrErr.get();
318        ParsedBinariesAndObjects.push_back(std::unique_ptr<Binary>(DbgBin));
319      }
320    }
321    // Try to locate the debug binary using .gnu_debuglink section.
322    if (!DbgBin) {
323      std::string DebuglinkName;
324      uint32_t CRCHash;
325      std::string DebugBinaryPath;
326      if (getGNUDebuglinkContents(Bin, DebuglinkName, CRCHash) &&
327          findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) {
328        BinaryOrErr = createBinary(DebugBinaryPath);
329        if (!error(BinaryOrErr.getError())) {
330          DbgBin = BinaryOrErr.get();
331          ParsedBinariesAndObjects.push_back(std::unique_ptr<Binary>(DbgBin));
332        }
333      }
334    }
335  }
336  if (!DbgBin)
337    DbgBin = Bin;
338  BinaryPair Res = std::make_pair(Bin, DbgBin);
339  BinaryForPath[Path] = Res;
340  return Res;
341}
342
343ObjectFile *
344LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) {
345  if (!Bin)
346    return nullptr;
347  ObjectFile *Res = nullptr;
348  if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) {
349    ObjectFileForArchMapTy::iterator I = ObjectFileForArch.find(
350        std::make_pair(UB, ArchName));
351    if (I != ObjectFileForArch.end())
352      return I->second;
353    ErrorOr<std::unique_ptr<ObjectFile>> ParsedObj =
354        UB->getObjectForArch(Triple(ArchName).getArch());
355    if (ParsedObj) {
356      Res = ParsedObj.get().get();
357      ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get()));
358    }
359    ObjectFileForArch[std::make_pair(UB, ArchName)] = Res;
360  } else if (Bin->isObject()) {
361    Res = cast<ObjectFile>(Bin);
362  }
363  return Res;
364}
365
366ModuleInfo *
367LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
368  ModuleMapTy::iterator I = Modules.find(ModuleName);
369  if (I != Modules.end())
370    return I->second;
371  std::string BinaryName = ModuleName;
372  std::string ArchName = Opts.DefaultArch;
373  size_t ColonPos = ModuleName.find_last_of(':');
374  // Verify that substring after colon form a valid arch name.
375  if (ColonPos != std::string::npos) {
376    std::string ArchStr = ModuleName.substr(ColonPos + 1);
377    if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
378      BinaryName = ModuleName.substr(0, ColonPos);
379      ArchName = ArchStr;
380    }
381  }
382  BinaryPair Binaries = getOrCreateBinary(BinaryName);
383  ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName);
384  ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName);
385
386  if (!Obj) {
387    // Failed to find valid object file.
388    Modules.insert(make_pair(ModuleName, (ModuleInfo *)nullptr));
389    return nullptr;
390  }
391  DIContext *Context = DIContext::getDWARFContext(DbgObj);
392  assert(Context);
393  ModuleInfo *Info = new ModuleInfo(Obj, Context);
394  Modules.insert(make_pair(ModuleName, Info));
395  return Info;
396}
397
398std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const {
399  // By default, DILineInfo contains "<invalid>" for function/filename it
400  // cannot fetch. We replace it to "??" to make our output closer to addr2line.
401  static const std::string kDILineInfoBadString = "<invalid>";
402  std::stringstream Result;
403  if (Opts.PrintFunctions != FunctionNameKind::None) {
404    std::string FunctionName = LineInfo.FunctionName;
405    if (FunctionName == kDILineInfoBadString)
406      FunctionName = kBadString;
407    else if (Opts.Demangle)
408      FunctionName = DemangleName(FunctionName);
409    Result << FunctionName << "\n";
410  }
411  std::string Filename = LineInfo.FileName;
412  if (Filename == kDILineInfoBadString)
413    Filename = kBadString;
414  Result << Filename << ":" << LineInfo.Line << ":" << LineInfo.Column << "\n";
415  return Result.str();
416}
417
418#if !defined(_MSC_VER)
419// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
420extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
421                                size_t *length, int *status);
422#endif
423
424std::string LLVMSymbolizer::DemangleName(const std::string &Name) {
425#if !defined(_MSC_VER)
426  // We can spoil names of symbols with C linkage, so use an heuristic
427  // approach to check if the name should be demangled.
428  if (Name.substr(0, 2) != "_Z")
429    return Name;
430  int status = 0;
431  char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
432  if (status != 0)
433    return Name;
434  std::string Result = DemangledName;
435  free(DemangledName);
436  return Result;
437#else
438  return Name;
439#endif
440}
441
442} // namespace symbolize
443} // namespace llvm
444