1//===-- LLVMSymbolize.cpp -------------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation for LLVM symbolization library.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/DebugInfo/Symbolize/Symbolize.h"
15
16#include "SymbolizableObjectFile.h"
17
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/Config/config.h"
20#include "llvm/DebugInfo/DWARF/DWARFContext.h"
21#include "llvm/DebugInfo/PDB/PDB.h"
22#include "llvm/DebugInfo/PDB/PDBContext.h"
23#include "llvm/Object/COFF.h"
24#include "llvm/Object/ELFObjectFile.h"
25#include "llvm/Object/MachO.h"
26#include "llvm/Object/MachOUniversal.h"
27#include "llvm/Support/COFF.h"
28#include "llvm/Support/Casting.h"
29#include "llvm/Support/Compression.h"
30#include "llvm/Support/DataExtractor.h"
31#include "llvm/Support/Errc.h"
32#include "llvm/Support/FileSystem.h"
33#include "llvm/Support/MemoryBuffer.h"
34#include "llvm/Support/Path.h"
35#include <algorithm>
36#include <cassert>
37#include <cstdlib>
38#include <cstring>
39
40#if defined(_MSC_VER)
41#include <Windows.h>
42#include <DbgHelp.h>
43#pragma comment(lib, "dbghelp.lib")
44
45// Windows.h conflicts with our COFF header definitions.
46#ifdef IMAGE_FILE_MACHINE_I386
47#undef IMAGE_FILE_MACHINE_I386
48#endif
49#endif
50
51namespace llvm {
52namespace symbolize {
53
54Expected<DILineInfo> LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
55                                                  uint64_t ModuleOffset) {
56  SymbolizableModule *Info;
57  if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName))
58    Info = InfoOrErr.get();
59  else
60    return InfoOrErr.takeError();
61
62  // A null module means an error has already been reported. Return an empty
63  // result.
64  if (!Info)
65    return DILineInfo();
66
67  // If the user is giving us relative addresses, add the preferred base of the
68  // object to the offset before we do the query. It's what DIContext expects.
69  if (Opts.RelativeAddresses)
70    ModuleOffset += Info->getModulePreferredBase();
71
72  DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions,
73                                            Opts.UseSymbolTable);
74  if (Opts.Demangle)
75    LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
76  return LineInfo;
77}
78
79Expected<DIInliningInfo>
80LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
81                                     uint64_t ModuleOffset) {
82  SymbolizableModule *Info;
83  if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName))
84    Info = InfoOrErr.get();
85  else
86    return InfoOrErr.takeError();
87
88  // A null module means an error has already been reported. Return an empty
89  // result.
90  if (!Info)
91    return DIInliningInfo();
92
93  // If the user is giving us relative addresses, add the preferred base of the
94  // object to the offset before we do the query. It's what DIContext expects.
95  if (Opts.RelativeAddresses)
96    ModuleOffset += Info->getModulePreferredBase();
97
98  DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
99      ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable);
100  if (Opts.Demangle) {
101    for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
102      auto *Frame = InlinedContext.getMutableFrame(i);
103      Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
104    }
105  }
106  return InlinedContext;
107}
108
109Expected<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
110                                                 uint64_t ModuleOffset) {
111  SymbolizableModule *Info;
112  if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName))
113    Info = InfoOrErr.get();
114  else
115    return InfoOrErr.takeError();
116
117  // A null module means an error has already been reported. Return an empty
118  // result.
119  if (!Info)
120    return DIGlobal();
121
122  // If the user is giving us relative addresses, add the preferred base of
123  // the object to the offset before we do the query. It's what DIContext
124  // expects.
125  if (Opts.RelativeAddresses)
126    ModuleOffset += Info->getModulePreferredBase();
127
128  DIGlobal Global = Info->symbolizeData(ModuleOffset);
129  if (Opts.Demangle)
130    Global.Name = DemangleName(Global.Name, Info);
131  return Global;
132}
133
134void LLVMSymbolizer::flush() {
135  ObjectForUBPathAndArch.clear();
136  BinaryForPath.clear();
137  ObjectPairForPathArch.clear();
138  Modules.clear();
139}
140
141namespace {
142
143// For Path="/path/to/foo" and Basename="foo" assume that debug info is in
144// /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
145// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
146// /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
147std::string getDarwinDWARFResourceForPath(
148    const std::string &Path, const std::string &Basename) {
149  SmallString<16> ResourceName = StringRef(Path);
150  if (sys::path::extension(Path) != ".dSYM") {
151    ResourceName += ".dSYM";
152  }
153  sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
154  sys::path::append(ResourceName, Basename);
155  return ResourceName.str();
156}
157
158bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
159  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
160      MemoryBuffer::getFileOrSTDIN(Path);
161  if (!MB)
162    return false;
163  return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
164}
165
166bool findDebugBinary(const std::string &OrigPath,
167                     const std::string &DebuglinkName, uint32_t CRCHash,
168                     std::string &Result) {
169  std::string OrigRealPath = OrigPath;
170#if defined(HAVE_REALPATH)
171  if (char *RP = realpath(OrigPath.c_str(), nullptr)) {
172    OrigRealPath = RP;
173    free(RP);
174  }
175#endif
176  SmallString<16> OrigDir(OrigRealPath);
177  llvm::sys::path::remove_filename(OrigDir);
178  SmallString<16> DebugPath = OrigDir;
179  // Try /path/to/original_binary/debuglink_name
180  llvm::sys::path::append(DebugPath, DebuglinkName);
181  if (checkFileCRC(DebugPath, CRCHash)) {
182    Result = DebugPath.str();
183    return true;
184  }
185  // Try /path/to/original_binary/.debug/debuglink_name
186  DebugPath = OrigRealPath;
187  llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
188  if (checkFileCRC(DebugPath, CRCHash)) {
189    Result = DebugPath.str();
190    return true;
191  }
192  // Try /usr/lib/debug/path/to/original_binary/debuglink_name
193  DebugPath = "/usr/lib/debug";
194  llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
195                          DebuglinkName);
196  if (checkFileCRC(DebugPath, CRCHash)) {
197    Result = DebugPath.str();
198    return true;
199  }
200  return false;
201}
202
203bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
204                             uint32_t &CRCHash) {
205  if (!Obj)
206    return false;
207  for (const SectionRef &Section : Obj->sections()) {
208    StringRef Name;
209    Section.getName(Name);
210    Name = Name.substr(Name.find_first_not_of("._"));
211    if (Name == "gnu_debuglink") {
212      StringRef Data;
213      Section.getContents(Data);
214      DataExtractor DE(Data, Obj->isLittleEndian(), 0);
215      uint32_t Offset = 0;
216      if (const char *DebugNameStr = DE.getCStr(&Offset)) {
217        // 4-byte align the offset.
218        Offset = (Offset + 3) & ~0x3;
219        if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
220          DebugName = DebugNameStr;
221          CRCHash = DE.getU32(&Offset);
222          return true;
223        }
224      }
225      break;
226    }
227  }
228  return false;
229}
230
231bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
232                             const MachOObjectFile *Obj) {
233  ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
234  ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
235  if (dbg_uuid.empty() || bin_uuid.empty())
236    return false;
237  return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
238}
239
240} // end anonymous namespace
241
242ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
243    const MachOObjectFile *MachExeObj, const std::string &ArchName) {
244  // On Darwin we may find DWARF in separate object file in
245  // resource directory.
246  std::vector<std::string> DsymPaths;
247  StringRef Filename = sys::path::filename(ExePath);
248  DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename));
249  for (const auto &Path : Opts.DsymHints) {
250    DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename));
251  }
252  for (const auto &Path : DsymPaths) {
253    auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
254    if (!DbgObjOrErr) {
255      // Ignore errors, the file might not exist.
256      consumeError(DbgObjOrErr.takeError());
257      continue;
258    }
259    ObjectFile *DbgObj = DbgObjOrErr.get();
260    if (!DbgObj)
261      continue;
262    const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
263    if (!MachDbgObj)
264      continue;
265    if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
266      return DbgObj;
267  }
268  return nullptr;
269}
270
271ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
272                                                  const ObjectFile *Obj,
273                                                  const std::string &ArchName) {
274  std::string DebuglinkName;
275  uint32_t CRCHash;
276  std::string DebugBinaryPath;
277  if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
278    return nullptr;
279  if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
280    return nullptr;
281  auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
282  if (!DbgObjOrErr) {
283    // Ignore errors, the file might not exist.
284    consumeError(DbgObjOrErr.takeError());
285    return nullptr;
286  }
287  return DbgObjOrErr.get();
288}
289
290Expected<LLVMSymbolizer::ObjectPair>
291LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
292                                      const std::string &ArchName) {
293  const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
294  if (I != ObjectPairForPathArch.end()) {
295    return I->second;
296  }
297
298  auto ObjOrErr = getOrCreateObject(Path, ArchName);
299  if (!ObjOrErr) {
300    ObjectPairForPathArch.insert(std::make_pair(std::make_pair(Path, ArchName),
301                                                ObjectPair(nullptr, nullptr)));
302    return ObjOrErr.takeError();
303  }
304
305  ObjectFile *Obj = ObjOrErr.get();
306  assert(Obj != nullptr);
307  ObjectFile *DbgObj = nullptr;
308
309  if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
310    DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
311  if (!DbgObj)
312    DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
313  if (!DbgObj)
314    DbgObj = Obj;
315  ObjectPair Res = std::make_pair(Obj, DbgObj);
316  ObjectPairForPathArch.insert(
317      std::make_pair(std::make_pair(Path, ArchName), Res));
318  return Res;
319}
320
321Expected<ObjectFile *>
322LLVMSymbolizer::getOrCreateObject(const std::string &Path,
323                                  const std::string &ArchName) {
324  const auto &I = BinaryForPath.find(Path);
325  Binary *Bin = nullptr;
326  if (I == BinaryForPath.end()) {
327    Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
328    if (!BinOrErr) {
329      BinaryForPath.insert(std::make_pair(Path, OwningBinary<Binary>()));
330      return BinOrErr.takeError();
331    }
332    Bin = BinOrErr->getBinary();
333    BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get())));
334  } else {
335    Bin = I->second.getBinary();
336  }
337
338  if (!Bin)
339    return static_cast<ObjectFile *>(nullptr);
340
341  if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
342    const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
343    if (I != ObjectForUBPathAndArch.end()) {
344      return I->second.get();
345    }
346    Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
347        UB->getObjectForArch(ArchName);
348    if (!ObjOrErr) {
349      ObjectForUBPathAndArch.insert(std::make_pair(
350          std::make_pair(Path, ArchName), std::unique_ptr<ObjectFile>()));
351      return ObjOrErr.takeError();
352    }
353    ObjectFile *Res = ObjOrErr->get();
354    ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName),
355                                                 std::move(ObjOrErr.get())));
356    return Res;
357  }
358  if (Bin->isObject()) {
359    return cast<ObjectFile>(Bin);
360  }
361  return errorCodeToError(object_error::arch_not_found);
362}
363
364Expected<SymbolizableModule *>
365LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
366  const auto &I = Modules.find(ModuleName);
367  if (I != Modules.end()) {
368    return I->second.get();
369  }
370  std::string BinaryName = ModuleName;
371  std::string ArchName = Opts.DefaultArch;
372  size_t ColonPos = ModuleName.find_last_of(':');
373  // Verify that substring after colon form a valid arch name.
374  if (ColonPos != std::string::npos) {
375    std::string ArchStr = ModuleName.substr(ColonPos + 1);
376    if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
377      BinaryName = ModuleName.substr(0, ColonPos);
378      ArchName = ArchStr;
379    }
380  }
381  auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName);
382  if (!ObjectsOrErr) {
383    // Failed to find valid object file.
384    Modules.insert(
385        std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>()));
386    return ObjectsOrErr.takeError();
387  }
388  ObjectPair Objects = ObjectsOrErr.get();
389
390  std::unique_ptr<DIContext> Context;
391  // If this is a COFF object containing PDB info, use a PDBContext to
392  // symbolize. Otherwise, use DWARF.
393  if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
394    const debug_pdb_info *PDBInfo;
395    StringRef PDBFileName;
396    auto EC = CoffObject->getDebugPDBInfo(PDBInfo, PDBFileName);
397    if (!EC && PDBInfo != nullptr) {
398      using namespace pdb;
399      std::unique_ptr<IPDBSession> Session;
400      if (auto Err = loadDataForEXE(PDB_ReaderType::DIA,
401                                    Objects.first->getFileName(), Session)) {
402        Modules.insert(
403            std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>()));
404        return std::move(Err);
405      }
406      Context.reset(new PDBContext(*CoffObject, std::move(Session)));
407    }
408  }
409  if (!Context)
410    Context.reset(new DWARFContextInMemory(*Objects.second));
411  assert(Context);
412  auto InfoOrErr =
413      SymbolizableObjectFile::create(Objects.first, std::move(Context));
414  std::unique_ptr<SymbolizableModule> SymMod;
415  if (InfoOrErr)
416    SymMod = std::move(InfoOrErr.get());
417  auto InsertResult =
418      Modules.insert(std::make_pair(ModuleName, std::move(SymMod)));
419  assert(InsertResult.second);
420  if (auto EC = InfoOrErr.getError())
421    return errorCodeToError(EC);
422  return InsertResult.first->second.get();
423}
424
425namespace {
426
427// Undo these various manglings for Win32 extern "C" functions:
428// cdecl       - _foo
429// stdcall     - _foo@12
430// fastcall    - @foo@12
431// vectorcall  - foo@@12
432// These are all different linkage names for 'foo'.
433StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
434  // Remove any '_' or '@' prefix.
435  char Front = SymbolName.empty() ? '\0' : SymbolName[0];
436  if (Front == '_' || Front == '@')
437    SymbolName = SymbolName.drop_front();
438
439  // Remove any '@[0-9]+' suffix.
440  if (Front != '?') {
441    size_t AtPos = SymbolName.rfind('@');
442    if (AtPos != StringRef::npos &&
443        std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(),
444                    [](char C) { return C >= '0' && C <= '9'; })) {
445      SymbolName = SymbolName.substr(0, AtPos);
446    }
447  }
448
449  // Remove any ending '@' for vectorcall.
450  if (SymbolName.endswith("@"))
451    SymbolName = SymbolName.drop_back();
452
453  return SymbolName;
454}
455
456} // end anonymous namespace
457
458#if !defined(_MSC_VER)
459// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
460extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
461                                size_t *length, int *status);
462#endif
463
464std::string LLVMSymbolizer::DemangleName(const std::string &Name,
465                                         const SymbolizableModule *ModInfo) {
466#if !defined(_MSC_VER)
467  // We can spoil names of symbols with C linkage, so use an heuristic
468  // approach to check if the name should be demangled.
469  if (Name.substr(0, 2) == "_Z") {
470    int status = 0;
471    char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
472    if (status != 0)
473      return Name;
474    std::string Result = DemangledName;
475    free(DemangledName);
476    return Result;
477  }
478#else
479  if (!Name.empty() && Name.front() == '?') {
480    // Only do MSVC C++ demangling on symbols starting with '?'.
481    char DemangledName[1024] = {0};
482    DWORD result = ::UnDecorateSymbolName(
483        Name.c_str(), DemangledName, 1023,
484        UNDNAME_NO_ACCESS_SPECIFIERS |       // Strip public, private, protected
485            UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
486            UNDNAME_NO_THROW_SIGNATURES |    // Strip throw() specifications
487            UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers
488            UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords
489            UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
490    return (result == 0) ? Name : std::string(DemangledName);
491  }
492#endif
493  if (ModInfo && ModInfo->isWin32Module())
494    return std::string(demanglePE32ExternCFunc(Name));
495  return Name;
496}
497
498} // namespace symbolize
499} // namespace llvm
500