1//===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" 11#include "llvm/MC/MCContext.h" 12#include "llvm/MC/MCExpr.h" 13#include "llvm/MC/MCInst.h" 14#include "llvm/Support/raw_ostream.h" 15#include <cstring> 16 17using namespace llvm; 18 19namespace llvm { 20class Triple; 21} 22 23// This function tries to add a symbolic operand in place of the immediate 24// Value in the MCInst. The immediate Value has had any PC adjustment made by 25// the caller. If the instruction is a branch instruction then IsBranch is true, 26// else false. If the getOpInfo() function was set as part of the 27// setupForSymbolicDisassembly() call then that function is called to get any 28// symbolic information at the Address for this instruction. If that returns 29// non-zero then the symbolic information it returns is used to create an MCExpr 30// and that is added as an operand to the MCInst. If getOpInfo() returns zero 31// and IsBranch is true then a symbol look up for Value is done and if a symbol 32// is found an MCExpr is created with that, else an MCExpr with Value is 33// created. This function returns true if it adds an operand to the MCInst and 34// false otherwise. 35bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, 36 raw_ostream &cStream, 37 int64_t Value, 38 uint64_t Address, 39 bool IsBranch, 40 uint64_t Offset, 41 uint64_t InstSize) { 42 struct LLVMOpInfo1 SymbolicOp; 43 std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 44 SymbolicOp.Value = Value; 45 46 if (!GetOpInfo || 47 !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) { 48 // Clear SymbolicOp.Value from above and also all other fields. 49 std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 50 51 // At this point, GetOpInfo() did not find any relocation information about 52 // this operand and we are left to use the SymbolLookUp() call back to guess 53 // if the Value is the address of a symbol. In the case this is a branch 54 // that always makes sense to guess. But in the case of an immediate it is 55 // a bit more questionable if it is an address of a symbol or some other 56 // reference. So if the immediate Value comes from a width of 1 byte, 57 // InstSize, we will not guess it is an address of a symbol. Because in 58 // object files assembled starting at address 0 this usually leads to 59 // incorrect symbolication. 60 if (!SymbolLookUp || (InstSize == 1 && !IsBranch)) 61 return false; 62 63 uint64_t ReferenceType; 64 if (IsBranch) 65 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; 66 else 67 ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; 68 const char *ReferenceName; 69 const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, 70 &ReferenceName); 71 if (Name) { 72 SymbolicOp.AddSymbol.Name = Name; 73 SymbolicOp.AddSymbol.Present = true; 74 // If Name is a C++ symbol name put the human readable name in a comment. 75 if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name) 76 cStream << ReferenceName; 77 } 78 // For branches always create an MCExpr so it gets printed as hex address. 79 else if (IsBranch) { 80 SymbolicOp.Value = Value; 81 } 82 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) 83 cStream << "symbol stub for: " << ReferenceName; 84 else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message) 85 cStream << "Objc message: " << ReferenceName; 86 if (!Name && !IsBranch) 87 return false; 88 } 89 90 const MCExpr *Add = nullptr; 91 if (SymbolicOp.AddSymbol.Present) { 92 if (SymbolicOp.AddSymbol.Name) { 93 StringRef Name(SymbolicOp.AddSymbol.Name); 94 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); 95 Add = MCSymbolRefExpr::create(Sym, Ctx); 96 } else { 97 Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx); 98 } 99 } 100 101 const MCExpr *Sub = nullptr; 102 if (SymbolicOp.SubtractSymbol.Present) { 103 if (SymbolicOp.SubtractSymbol.Name) { 104 StringRef Name(SymbolicOp.SubtractSymbol.Name); 105 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); 106 Sub = MCSymbolRefExpr::create(Sym, Ctx); 107 } else { 108 Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx); 109 } 110 } 111 112 const MCExpr *Off = nullptr; 113 if (SymbolicOp.Value != 0) 114 Off = MCConstantExpr::create(SymbolicOp.Value, Ctx); 115 116 const MCExpr *Expr; 117 if (Sub) { 118 const MCExpr *LHS; 119 if (Add) 120 LHS = MCBinaryExpr::createSub(Add, Sub, Ctx); 121 else 122 LHS = MCUnaryExpr::createMinus(Sub, Ctx); 123 if (Off) 124 Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx); 125 else 126 Expr = LHS; 127 } else if (Add) { 128 if (Off) 129 Expr = MCBinaryExpr::createAdd(Add, Off, Ctx); 130 else 131 Expr = Add; 132 } else { 133 if (Off) 134 Expr = Off; 135 else 136 Expr = MCConstantExpr::create(0, Ctx); 137 } 138 139 Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind); 140 if (!Expr) 141 return false; 142 143 MI.addOperand(MCOperand::createExpr(Expr)); 144 return true; 145} 146 147// This function tries to add a comment as to what is being referenced by a load 148// instruction with the base register that is the Pc. These can often be values 149// in a literal pool near the Address of the instruction. The Address of the 150// instruction and its immediate Value are used as a possible literal pool entry. 151// The SymbolLookUp call back will return the name of a symbol referenced by the 152// literal pool's entry if the referenced address is that of a symbol. Or it 153// will return a pointer to a literal 'C' string if the referenced address of 154// the literal pool's entry is an address into a section with C string literals. 155// Or if the reference is to an Objective-C data structure it will return a 156// specific reference type for it and a string. 157void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 158 int64_t Value, 159 uint64_t Address) { 160 if (SymbolLookUp) { 161 uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; 162 const char *ReferenceName; 163 (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); 164 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) 165 cStream << "literal pool symbol address: " << ReferenceName; 166 else if(ReferenceType == 167 LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { 168 cStream << "literal pool for: \""; 169 cStream.write_escaped(ReferenceName); 170 cStream << "\""; 171 } 172 else if(ReferenceType == 173 LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) 174 cStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; 175 else if(ReferenceType == 176 LLVMDisassembler_ReferenceType_Out_Objc_Message) 177 cStream << "Objc message: " << ReferenceName; 178 else if(ReferenceType == 179 LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) 180 cStream << "Objc message ref: " << ReferenceName; 181 else if(ReferenceType == 182 LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) 183 cStream << "Objc selector ref: " << ReferenceName; 184 else if(ReferenceType == 185 LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) 186 cStream << "Objc class ref: " << ReferenceName; 187 } 188} 189 190namespace llvm { 191MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo, 192 LLVMSymbolLookupCallback SymbolLookUp, 193 void *DisInfo, MCContext *Ctx, 194 std::unique_ptr<MCRelocationInfo> &&RelInfo) { 195 assert(Ctx && "No MCContext given for symbolic disassembly"); 196 197 return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo, 198 SymbolLookUp, DisInfo); 199} 200} 201