1//===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
11#include "llvm/MC/MCContext.h"
12#include "llvm/MC/MCExpr.h"
13#include "llvm/MC/MCInst.h"
14#include "llvm/Support/raw_ostream.h"
15#include <cstring>
16
17using namespace llvm;
18
19namespace llvm {
20class Triple;
21}
22
23// This function tries to add a symbolic operand in place of the immediate
24// Value in the MCInst. The immediate Value has had any PC adjustment made by
25// the caller. If the instruction is a branch instruction then IsBranch is true,
26// else false. If the getOpInfo() function was set as part of the
27// setupForSymbolicDisassembly() call then that function is called to get any
28// symbolic information at the Address for this instruction. If that returns
29// non-zero then the symbolic information it returns is used to create an MCExpr
30// and that is added as an operand to the MCInst. If getOpInfo() returns zero
31// and IsBranch is true then a symbol look up for Value is done and if a symbol
32// is found an MCExpr is created with that, else an MCExpr with Value is
33// created. This function returns true if it adds an operand to the MCInst and
34// false otherwise.
35bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
36                                                    raw_ostream &cStream,
37                                                    int64_t Value,
38                                                    uint64_t Address,
39                                                    bool IsBranch,
40                                                    uint64_t Offset,
41                                                    uint64_t InstSize) {
42  struct LLVMOpInfo1 SymbolicOp;
43  std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
44  SymbolicOp.Value = Value;
45
46  if (!GetOpInfo ||
47      !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) {
48    // Clear SymbolicOp.Value from above and also all other fields.
49    std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
50
51    // At this point, GetOpInfo() did not find any relocation information about
52    // this operand and we are left to use the SymbolLookUp() call back to guess
53    // if the Value is the address of a symbol.  In the case this is a branch
54    // that always makes sense to guess.  But in the case of an immediate it is
55    // a bit more questionable if it is an address of a symbol or some other
56    // reference.  So if the immediate Value comes from a width of 1 byte,
57    // InstSize, we will not guess it is an address of a symbol.  Because in
58    // object files assembled starting at address 0 this usually leads to
59    // incorrect symbolication.
60    if (!SymbolLookUp || (InstSize == 1 && !IsBranch))
61      return false;
62
63    uint64_t ReferenceType;
64    if (IsBranch)
65       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
66    else
67       ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
68    const char *ReferenceName;
69    const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
70                                    &ReferenceName);
71    if (Name) {
72      SymbolicOp.AddSymbol.Name = Name;
73      SymbolicOp.AddSymbol.Present = true;
74      // If Name is a C++ symbol name put the human readable name in a comment.
75      if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name)
76        cStream << ReferenceName;
77    }
78    // For branches always create an MCExpr so it gets printed as hex address.
79    else if (IsBranch) {
80      SymbolicOp.Value = Value;
81    }
82    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
83      cStream << "symbol stub for: " << ReferenceName;
84    else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message)
85      cStream << "Objc message: " << ReferenceName;
86    if (!Name && !IsBranch)
87      return false;
88  }
89
90  const MCExpr *Add = nullptr;
91  if (SymbolicOp.AddSymbol.Present) {
92    if (SymbolicOp.AddSymbol.Name) {
93      StringRef Name(SymbolicOp.AddSymbol.Name);
94      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
95      Add = MCSymbolRefExpr::create(Sym, Ctx);
96    } else {
97      Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx);
98    }
99  }
100
101  const MCExpr *Sub = nullptr;
102  if (SymbolicOp.SubtractSymbol.Present) {
103      if (SymbolicOp.SubtractSymbol.Name) {
104      StringRef Name(SymbolicOp.SubtractSymbol.Name);
105      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
106      Sub = MCSymbolRefExpr::create(Sym, Ctx);
107    } else {
108      Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx);
109    }
110  }
111
112  const MCExpr *Off = nullptr;
113  if (SymbolicOp.Value != 0)
114    Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
115
116  const MCExpr *Expr;
117  if (Sub) {
118    const MCExpr *LHS;
119    if (Add)
120      LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
121    else
122      LHS = MCUnaryExpr::createMinus(Sub, Ctx);
123    if (Off)
124      Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
125    else
126      Expr = LHS;
127  } else if (Add) {
128    if (Off)
129      Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
130    else
131      Expr = Add;
132  } else {
133    if (Off)
134      Expr = Off;
135    else
136      Expr = MCConstantExpr::create(0, Ctx);
137  }
138
139  Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind);
140  if (!Expr)
141    return false;
142
143  MI.addOperand(MCOperand::createExpr(Expr));
144  return true;
145}
146
147// This function tries to add a comment as to what is being referenced by a load
148// instruction with the base register that is the Pc.  These can often be values
149// in a literal pool near the Address of the instruction. The Address of the
150// instruction and its immediate Value are used as a possible literal pool entry.
151// The SymbolLookUp call back will return the name of a symbol referenced by the
152// literal pool's entry if the referenced address is that of a symbol. Or it
153// will return a pointer to a literal 'C' string if the referenced address of
154// the literal pool's entry is an address into a section with C string literals.
155// Or if the reference is to an Objective-C data structure it will return a
156// specific reference type for it and a string.
157void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
158                                                           int64_t Value,
159                                                           uint64_t Address) {
160  if (SymbolLookUp) {
161    uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
162    const char *ReferenceName;
163    (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
164    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
165      cStream << "literal pool symbol address: " << ReferenceName;
166    else if(ReferenceType ==
167            LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
168      cStream << "literal pool for: \"";
169      cStream.write_escaped(ReferenceName);
170      cStream << "\"";
171    }
172    else if(ReferenceType ==
173            LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
174      cStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
175    else if(ReferenceType ==
176            LLVMDisassembler_ReferenceType_Out_Objc_Message)
177      cStream << "Objc message: " << ReferenceName;
178    else if(ReferenceType ==
179            LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
180      cStream << "Objc message ref: " << ReferenceName;
181    else if(ReferenceType ==
182            LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
183      cStream << "Objc selector ref: " << ReferenceName;
184    else if(ReferenceType ==
185            LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
186      cStream << "Objc class ref: " << ReferenceName;
187  }
188}
189
190namespace llvm {
191MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
192                                 LLVMSymbolLookupCallback SymbolLookUp,
193                                 void *DisInfo, MCContext *Ctx,
194                                 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
195  assert(Ctx && "No MCContext given for symbolic disassembly");
196
197  return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo,
198                                  SymbolLookUp, DisInfo);
199}
200}
201