1//===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "AArch64ExternalSymbolizer.h"
11#include "AArch64Subtarget.h"
12#include "MCTargetDesc/AArch64AddressingModes.h"
13#include "Utils/AArch64BaseInfo.h"
14#include "llvm/MC/MCContext.h"
15#include "llvm/MC/MCExpr.h"
16#include "llvm/MC/MCInst.h"
17#include "llvm/Support/Format.h"
18#include "llvm/Support/raw_ostream.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "aarch64-disassembler"
23
24static MCSymbolRefExpr::VariantKind
25getVariant(uint64_t LLVMDisassembler_VariantKind) {
26  switch (LLVMDisassembler_VariantKind) {
27  case LLVMDisassembler_VariantKind_None:
28    return MCSymbolRefExpr::VK_None;
29  case LLVMDisassembler_VariantKind_ARM64_PAGE:
30    return MCSymbolRefExpr::VK_PAGE;
31  case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
32    return MCSymbolRefExpr::VK_PAGEOFF;
33  case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
34    return MCSymbolRefExpr::VK_GOTPAGE;
35  case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
36    return MCSymbolRefExpr::VK_GOTPAGEOFF;
37  case LLVMDisassembler_VariantKind_ARM64_TLVP:
38  case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
39  default:
40    llvm_unreachable("bad LLVMDisassembler_VariantKind");
41  }
42}
43
44/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
45/// operand in place of the immediate Value in the MCInst.  The immediate
46/// Value has not had any PC adjustment made by the caller. If the instruction
47/// is a branch that adds the PC to the immediate Value then isBranch is
48/// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
49/// symbolic information at the Address for this instrution.  If that returns
50/// non-zero then the symbolic information it returns is used to create an
51/// MCExpr and that is added as an operand to the MCInst.  If GetOpInfo()
52/// returns zero and isBranch is Success then a symbol look up for
53/// Address + Value is done and if a symbol is found an MCExpr is created with
54/// that, else an MCExpr with Address + Value is created.  If GetOpInfo()
55/// returns zero and isBranch is Fail then the Opcode of the MCInst is
56/// tested and for ADRP an other instructions that help to load of pointers
57/// a symbol look up is done to see it is returns a specific reference type
58/// to add to the comment stream.  This function returns Success if it adds
59/// an operand to the MCInst and Fail otherwise.
60bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
61    MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
62    bool IsBranch, uint64_t Offset, uint64_t InstSize) {
63  // FIXME: This method shares a lot of code with
64  //        MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
65  //        refactor the MCExternalSymbolizer interface to allow more of this
66  //        implementation to be shared.
67  //
68  struct LLVMOpInfo1 SymbolicOp;
69  memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
70  SymbolicOp.Value = Value;
71  uint64_t ReferenceType;
72  const char *ReferenceName;
73  if (!GetOpInfo ||
74      !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
75    if (IsBranch) {
76      ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
77      const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
78                                      Address, &ReferenceName);
79      if (Name) {
80        SymbolicOp.AddSymbol.Name = Name;
81        SymbolicOp.AddSymbol.Present = true;
82        SymbolicOp.Value = 0;
83      } else {
84        SymbolicOp.Value = Address + Value;
85      }
86      if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
87        CommentStream << "symbol stub for: " << ReferenceName;
88      else if (ReferenceType ==
89               LLVMDisassembler_ReferenceType_Out_Objc_Message)
90        CommentStream << "Objc message: " << ReferenceName;
91    } else if (MI.getOpcode() == AArch64::ADRP) {
92        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
93        // otool expects the fully encoded ADRP instruction to be passed in as
94        // the value here, so reconstruct it:
95        const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
96        uint32_t EncodedInst = 0x90000000;
97        EncodedInst |= (Value & 0x3) << 29; // immlo
98        EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
99        EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg
100        SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
101                     &ReferenceName);
102        CommentStream << format("0x%llx",
103                                0xfffffffffffff000LL & (Address + Value));
104    } else if (MI.getOpcode() == AArch64::ADDXri ||
105               MI.getOpcode() == AArch64::LDRXui ||
106               MI.getOpcode() == AArch64::LDRXl ||
107               MI.getOpcode() == AArch64::ADR) {
108      if (MI.getOpcode() == AArch64::ADDXri)
109        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
110      else if (MI.getOpcode() == AArch64::LDRXui)
111        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
112      if (MI.getOpcode() == AArch64::LDRXl) {
113        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
114        SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
115                     &ReferenceName);
116      } else if (MI.getOpcode() == AArch64::ADR) {
117        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
118        SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
119                            &ReferenceName);
120      } else {
121        const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
122        // otool expects the fully encoded ADD/LDR instruction to be passed in
123        // as the value here, so reconstruct it:
124        unsigned EncodedInst =
125          MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
126        EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
127        EncodedInst |=
128          MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
129        EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd
130
131        SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
132                     &ReferenceName);
133      }
134      if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
135        CommentStream << "literal pool symbol address: " << ReferenceName;
136      else if (ReferenceType ==
137               LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
138        CommentStream << "literal pool for: \"";
139        CommentStream.write_escaped(ReferenceName);
140        CommentStream << "\"";
141      } else if (ReferenceType ==
142               LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
143        CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
144      else if (ReferenceType ==
145               LLVMDisassembler_ReferenceType_Out_Objc_Message)
146        CommentStream << "Objc message: " << ReferenceName;
147      else if (ReferenceType ==
148               LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
149        CommentStream << "Objc message ref: " << ReferenceName;
150      else if (ReferenceType ==
151               LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
152        CommentStream << "Objc selector ref: " << ReferenceName;
153      else if (ReferenceType ==
154               LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
155        CommentStream << "Objc class ref: " << ReferenceName;
156      // For these instructions, the SymbolLookUp() above is just to get the
157      // ReferenceType and ReferenceName.  We want to make sure not to
158      // fall through so we don't build an MCExpr to leave the disassembly
159      // of the immediate values of these instructions to the InstPrinter.
160      return false;
161    } else {
162      return false;
163    }
164  }
165
166  const MCExpr *Add = nullptr;
167  if (SymbolicOp.AddSymbol.Present) {
168    if (SymbolicOp.AddSymbol.Name) {
169      StringRef Name(SymbolicOp.AddSymbol.Name);
170      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
171      MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
172      if (Variant != MCSymbolRefExpr::VK_None)
173        Add = MCSymbolRefExpr::create(Sym, Variant, Ctx);
174      else
175        Add = MCSymbolRefExpr::create(Sym, Ctx);
176    } else {
177      Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx);
178    }
179  }
180
181  const MCExpr *Sub = nullptr;
182  if (SymbolicOp.SubtractSymbol.Present) {
183    if (SymbolicOp.SubtractSymbol.Name) {
184      StringRef Name(SymbolicOp.SubtractSymbol.Name);
185      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
186      Sub = MCSymbolRefExpr::create(Sym, Ctx);
187    } else {
188      Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx);
189    }
190  }
191
192  const MCExpr *Off = nullptr;
193  if (SymbolicOp.Value != 0)
194    Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
195
196  const MCExpr *Expr;
197  if (Sub) {
198    const MCExpr *LHS;
199    if (Add)
200      LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
201    else
202      LHS = MCUnaryExpr::createMinus(Sub, Ctx);
203    if (Off)
204      Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
205    else
206      Expr = LHS;
207  } else if (Add) {
208    if (Off)
209      Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
210    else
211      Expr = Add;
212  } else {
213    if (Off)
214      Expr = Off;
215    else
216      Expr = MCConstantExpr::create(0, Ctx);
217  }
218
219  MI.addOperand(MCOperand::createExpr(Expr));
220
221  return true;
222}
223