1//===-- LLVMSymbolize.cpp -------------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation for LLVM symbolization library.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LLVMSymbolize.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/Object/MachO.h"
17#include "llvm/Support/Casting.h"
18#include "llvm/Support/FileSystem.h"
19#include "llvm/Support/Path.h"
20
21#include <sstream>
22
23namespace llvm {
24namespace symbolize {
25
26static bool error(error_code ec) {
27  if (!ec)
28    return false;
29  errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n";
30  return true;
31}
32
33static uint32_t
34getDILineInfoSpecifierFlags(const LLVMSymbolizer::Options &Opts) {
35  uint32_t Flags = llvm::DILineInfoSpecifier::FileLineInfo |
36                   llvm::DILineInfoSpecifier::AbsoluteFilePath;
37  if (Opts.PrintFunctions)
38    Flags |= llvm::DILineInfoSpecifier::FunctionName;
39  return Flags;
40}
41
42static void patchFunctionNameInDILineInfo(const std::string &NewFunctionName,
43                                          DILineInfo &LineInfo) {
44  std::string FileName = LineInfo.getFileName();
45  LineInfo = DILineInfo(StringRef(FileName), StringRef(NewFunctionName),
46                        LineInfo.getLine(), LineInfo.getColumn());
47}
48
49ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
50    : Module(Obj), DebugInfoContext(DICtx) {
51  error_code ec;
52  for (symbol_iterator si = Module->begin_symbols(), se = Module->end_symbols();
53       si != se; si.increment(ec)) {
54    if (error(ec))
55      return;
56    SymbolRef::Type SymbolType;
57    if (error(si->getType(SymbolType)))
58      continue;
59    if (SymbolType != SymbolRef::ST_Function &&
60        SymbolType != SymbolRef::ST_Data)
61      continue;
62    uint64_t SymbolAddress;
63    if (error(si->getAddress(SymbolAddress)) ||
64        SymbolAddress == UnknownAddressOrSize)
65      continue;
66    uint64_t SymbolSize;
67    // Getting symbol size is linear for Mach-O files, so assume that symbol
68    // occupies the memory range up to the following symbol.
69    if (isa<MachOObjectFile>(Obj))
70      SymbolSize = 0;
71    else if (error(si->getSize(SymbolSize)) ||
72             SymbolSize == UnknownAddressOrSize)
73      continue;
74    StringRef SymbolName;
75    if (error(si->getName(SymbolName)))
76      continue;
77    // Mach-O symbol table names have leading underscore, skip it.
78    if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
79      SymbolName = SymbolName.drop_front();
80    // FIXME: If a function has alias, there are two entries in symbol table
81    // with same address size. Make sure we choose the correct one.
82    SymbolMapTy &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
83    SymbolDesc SD = { SymbolAddress, SymbolSize };
84    M.insert(std::make_pair(SD, SymbolName));
85  }
86}
87
88bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
89                                        std::string &Name, uint64_t &Addr,
90                                        uint64_t &Size) const {
91  const SymbolMapTy &M = Type == SymbolRef::ST_Function ? Functions : Objects;
92  if (M.empty())
93    return false;
94  SymbolDesc SD = { Address, Address };
95  SymbolMapTy::const_iterator it = M.upper_bound(SD);
96  if (it == M.begin())
97    return false;
98  --it;
99  if (it->first.Size != 0 && it->first.Addr + it->first.Size <= Address)
100    return false;
101  Name = it->second.str();
102  Addr = it->first.Addr;
103  Size = it->first.Size;
104  return true;
105}
106
107DILineInfo ModuleInfo::symbolizeCode(
108    uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
109  DILineInfo LineInfo;
110  if (DebugInfoContext) {
111    LineInfo = DebugInfoContext->getLineInfoForAddress(
112        ModuleOffset, getDILineInfoSpecifierFlags(Opts));
113  }
114  // Override function name from symbol table if necessary.
115  if (Opts.PrintFunctions && Opts.UseSymbolTable) {
116    std::string FunctionName;
117    uint64_t Start, Size;
118    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
119                               FunctionName, Start, Size)) {
120      patchFunctionNameInDILineInfo(FunctionName, LineInfo);
121    }
122  }
123  return LineInfo;
124}
125
126DIInliningInfo ModuleInfo::symbolizeInlinedCode(
127    uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
128  DIInliningInfo InlinedContext;
129  if (DebugInfoContext) {
130    InlinedContext = DebugInfoContext->getInliningInfoForAddress(
131        ModuleOffset, getDILineInfoSpecifierFlags(Opts));
132  }
133  // Make sure there is at least one frame in context.
134  if (InlinedContext.getNumberOfFrames() == 0) {
135    InlinedContext.addFrame(DILineInfo());
136  }
137  // Override the function name in lower frame with name from symbol table.
138  if (Opts.PrintFunctions && Opts.UseSymbolTable) {
139    DIInliningInfo PatchedInlinedContext;
140    for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
141      DILineInfo LineInfo = InlinedContext.getFrame(i);
142      if (i == n - 1) {
143        std::string FunctionName;
144        uint64_t Start, Size;
145        if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
146                                   FunctionName, Start, Size)) {
147          patchFunctionNameInDILineInfo(FunctionName, LineInfo);
148        }
149      }
150      PatchedInlinedContext.addFrame(LineInfo);
151    }
152    InlinedContext = PatchedInlinedContext;
153  }
154  return InlinedContext;
155}
156
157bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name,
158                               uint64_t &Start, uint64_t &Size) const {
159  return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start,
160                                Size);
161}
162
163const char LLVMSymbolizer::kBadString[] = "??";
164
165std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
166                                          uint64_t ModuleOffset) {
167  ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
168  if (Info == 0)
169    return printDILineInfo(DILineInfo());
170  if (Opts.PrintInlining) {
171    DIInliningInfo InlinedContext =
172        Info->symbolizeInlinedCode(ModuleOffset, Opts);
173    uint32_t FramesNum = InlinedContext.getNumberOfFrames();
174    assert(FramesNum > 0);
175    std::string Result;
176    for (uint32_t i = 0; i < FramesNum; i++) {
177      DILineInfo LineInfo = InlinedContext.getFrame(i);
178      Result += printDILineInfo(LineInfo);
179    }
180    return Result;
181  }
182  DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts);
183  return printDILineInfo(LineInfo);
184}
185
186std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
187                                          uint64_t ModuleOffset) {
188  std::string Name = kBadString;
189  uint64_t Start = 0;
190  uint64_t Size = 0;
191  if (Opts.UseSymbolTable) {
192    if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) {
193      if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle)
194        Name = DemangleName(Name);
195    }
196  }
197  std::stringstream ss;
198  ss << Name << "\n" << Start << " " << Size << "\n";
199  return ss.str();
200}
201
202void LLVMSymbolizer::flush() {
203  DeleteContainerSeconds(Modules);
204  DeleteContainerPointers(ParsedBinariesAndObjects);
205  BinaryForPath.clear();
206  ObjectFileForArch.clear();
207}
208
209static std::string getDarwinDWARFResourceForPath(const std::string &Path) {
210  StringRef Basename = sys::path::filename(Path);
211  const std::string &DSymDirectory = Path + ".dSYM";
212  SmallString<16> ResourceName = StringRef(DSymDirectory);
213  sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
214  sys::path::append(ResourceName, Basename);
215  return ResourceName.str();
216}
217
218LLVMSymbolizer::BinaryPair
219LLVMSymbolizer::getOrCreateBinary(const std::string &Path) {
220  BinaryMapTy::iterator I = BinaryForPath.find(Path);
221  if (I != BinaryForPath.end())
222    return I->second;
223  Binary *Bin = 0;
224  Binary *DbgBin = 0;
225  OwningPtr<Binary> ParsedBinary;
226  OwningPtr<Binary> ParsedDbgBinary;
227  if (!error(createBinary(Path, ParsedBinary))) {
228    // Check if it's a universal binary.
229    Bin = ParsedBinary.take();
230    ParsedBinariesAndObjects.push_back(Bin);
231    if (Bin->isMachO() || Bin->isMachOUniversalBinary()) {
232      // On Darwin we may find DWARF in separate object file in
233      // resource directory.
234      const std::string &ResourcePath =
235          getDarwinDWARFResourceForPath(Path);
236      bool ResourceFileExists = false;
237      if (!sys::fs::exists(ResourcePath, ResourceFileExists) &&
238          ResourceFileExists &&
239          !error(createBinary(ResourcePath, ParsedDbgBinary))) {
240        DbgBin = ParsedDbgBinary.take();
241        ParsedBinariesAndObjects.push_back(DbgBin);
242      }
243    }
244  }
245  if (DbgBin == 0)
246    DbgBin = Bin;
247  BinaryPair Res = std::make_pair(Bin, DbgBin);
248  BinaryForPath[Path] = Res;
249  return Res;
250}
251
252ObjectFile *
253LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) {
254  if (Bin == 0)
255    return 0;
256  ObjectFile *Res = 0;
257  if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) {
258    ObjectFileForArchMapTy::iterator I = ObjectFileForArch.find(
259        std::make_pair(UB, ArchName));
260    if (I != ObjectFileForArch.end())
261      return I->second;
262    OwningPtr<ObjectFile> ParsedObj;
263    if (!UB->getObjectForArch(Triple(ArchName).getArch(), ParsedObj)) {
264      Res = ParsedObj.take();
265      ParsedBinariesAndObjects.push_back(Res);
266    }
267    ObjectFileForArch[std::make_pair(UB, ArchName)] = Res;
268  } else if (Bin->isObject()) {
269    Res = cast<ObjectFile>(Bin);
270  }
271  return Res;
272}
273
274ModuleInfo *
275LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
276  ModuleMapTy::iterator I = Modules.find(ModuleName);
277  if (I != Modules.end())
278    return I->second;
279  std::string BinaryName = ModuleName;
280  std::string ArchName = Opts.DefaultArch;
281  size_t ColonPos = ModuleName.find_last_of(':');
282  // Verify that substring after colon form a valid arch name.
283  if (ColonPos != std::string::npos) {
284    std::string ArchStr = ModuleName.substr(ColonPos + 1);
285    if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
286      BinaryName = ModuleName.substr(0, ColonPos);
287      ArchName = ArchStr;
288    }
289  }
290  BinaryPair Binaries = getOrCreateBinary(BinaryName);
291  ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName);
292  ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName);
293
294  if (Obj == 0) {
295    // Failed to find valid object file.
296    Modules.insert(make_pair(ModuleName, (ModuleInfo *)0));
297    return 0;
298  }
299  DIContext *Context = DIContext::getDWARFContext(DbgObj);
300  assert(Context);
301  ModuleInfo *Info = new ModuleInfo(Obj, Context);
302  Modules.insert(make_pair(ModuleName, Info));
303  return Info;
304}
305
306std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const {
307  // By default, DILineInfo contains "<invalid>" for function/filename it
308  // cannot fetch. We replace it to "??" to make our output closer to addr2line.
309  static const std::string kDILineInfoBadString = "<invalid>";
310  std::stringstream Result;
311  if (Opts.PrintFunctions) {
312    std::string FunctionName = LineInfo.getFunctionName();
313    if (FunctionName == kDILineInfoBadString)
314      FunctionName = kBadString;
315    else if (Opts.Demangle)
316      FunctionName = DemangleName(FunctionName);
317    Result << FunctionName << "\n";
318  }
319  std::string Filename = LineInfo.getFileName();
320  if (Filename == kDILineInfoBadString)
321    Filename = kBadString;
322  Result << Filename << ":" << LineInfo.getLine() << ":" << LineInfo.getColumn()
323         << "\n";
324  return Result.str();
325}
326
327#if !defined(_MSC_VER)
328// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
329extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
330                                size_t *length, int *status);
331#endif
332
333std::string LLVMSymbolizer::DemangleName(const std::string &Name) {
334#if !defined(_MSC_VER)
335  int status = 0;
336  char *DemangledName = __cxa_demangle(Name.c_str(), 0, 0, &status);
337  if (status != 0)
338    return Name;
339  std::string Result = DemangledName;
340  free(DemangledName);
341  return Result;
342#else
343  return Name;
344#endif
345}
346
347} // namespace symbolize
348} // namespace llvm
349