1//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the Enhanced Disassembly library's disassembler class. 11// The disassembler is responsible for vending individual instructions according 12// to a given architecture and disassembly syntax. 13// 14//===----------------------------------------------------------------------===// 15 16#include "EDDisassembler.h" 17#include "EDInst.h" 18#include "llvm/MC/EDInstInfo.h" 19#include "llvm/MC/MCAsmInfo.h" 20#include "llvm/MC/MCContext.h" 21#include "llvm/MC/MCDisassembler.h" 22#include "llvm/MC/MCExpr.h" 23#include "llvm/MC/MCInst.h" 24#include "llvm/MC/MCInstPrinter.h" 25#include "llvm/MC/MCInstrInfo.h" 26#include "llvm/MC/MCRegisterInfo.h" 27#include "llvm/MC/MCStreamer.h" 28#include "llvm/MC/MCSubtargetInfo.h" 29#include "llvm/MC/MCParser/AsmLexer.h" 30#include "llvm/MC/MCParser/MCAsmParser.h" 31#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32#include "llvm/MC/MCTargetAsmLexer.h" 33#include "llvm/MC/MCTargetAsmParser.h" 34#include "llvm/Support/MemoryBuffer.h" 35#include "llvm/Support/MemoryObject.h" 36#include "llvm/Support/SourceMgr.h" 37#include "llvm/Support/TargetRegistry.h" 38using namespace llvm; 39 40EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; 41 42struct TripleMap { 43 Triple::ArchType Arch; 44 const char *String; 45}; 46 47static const struct TripleMap triplemap[] = { 48 { Triple::x86, "i386-unknown-unknown" }, 49 { Triple::x86_64, "x86_64-unknown-unknown" }, 50 { Triple::arm, "arm-unknown-unknown" }, 51 { Triple::thumb, "thumb-unknown-unknown" } 52}; 53 54/// infoFromArch - Returns the TripleMap corresponding to a given architecture, 55/// or NULL if there is an error 56/// 57/// @arg arch - The Triple::ArchType for the desired architecture 58static const char *tripleFromArch(Triple::ArchType arch) { 59 unsigned int infoIndex; 60 61 for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) { 62 if (arch == triplemap[infoIndex].Arch) 63 return triplemap[infoIndex].String; 64 } 65 66 return NULL; 67} 68 69/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer 70/// for the desired assembly syntax, suitable for passing to 71/// Target::createMCInstPrinter() 72/// 73/// @arg arch - The target architecture 74/// @arg syntax - The assembly syntax in sd form 75static int getLLVMSyntaxVariant(Triple::ArchType arch, 76 EDDisassembler::AssemblySyntax syntax) { 77 switch (syntax) { 78 // Mappings below from X86AsmPrinter.cpp 79 case EDDisassembler::kEDAssemblySyntaxX86ATT: 80 if (arch == Triple::x86 || arch == Triple::x86_64) 81 return 0; 82 break; 83 case EDDisassembler::kEDAssemblySyntaxX86Intel: 84 if (arch == Triple::x86 || arch == Triple::x86_64) 85 return 1; 86 break; 87 case EDDisassembler::kEDAssemblySyntaxARMUAL: 88 if (arch == Triple::arm || arch == Triple::thumb) 89 return 0; 90 break; 91 } 92 93 return -1; 94} 95 96EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, 97 AssemblySyntax syntax) { 98 const char *triple = tripleFromArch(arch); 99 return getDisassembler(StringRef(triple), syntax); 100} 101 102EDDisassembler *EDDisassembler::getDisassembler(StringRef str, 103 AssemblySyntax syntax) { 104 CPUKey key; 105 key.Triple = str.str(); 106 key.Syntax = syntax; 107 108 EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key); 109 110 if (i != sDisassemblers.end()) { 111 return i->second; 112 } 113 114 EDDisassembler *sdd = new EDDisassembler(key); 115 if (!sdd->valid()) { 116 delete sdd; 117 return NULL; 118 } 119 120 sDisassemblers[key] = sdd; 121 122 return sdd; 123} 124 125EDDisassembler::EDDisassembler(CPUKey &key) : 126 Valid(false), 127 HasSemantics(false), 128 ErrorStream(nulls()), 129 Key(key), 130 TgtTriple(key.Triple.c_str()) { 131 132 LLVMSyntaxVariant = getLLVMSyntaxVariant(TgtTriple.getArch(), key.Syntax); 133 134 if (LLVMSyntaxVariant < 0) 135 return; 136 137 std::string tripleString(key.Triple); 138 std::string errorString; 139 140 Tgt = TargetRegistry::lookupTarget(key.Triple, 141 errorString); 142 143 if (!Tgt) 144 return; 145 146 MRI.reset(Tgt->createMCRegInfo(tripleString)); 147 148 if (!MRI) 149 return; 150 151 initMaps(*MRI); 152 153 AsmInfo.reset(Tgt->createMCAsmInfo(tripleString)); 154 155 if (!AsmInfo) 156 return; 157 158 STI.reset(Tgt->createMCSubtargetInfo(tripleString, "", "")); 159 160 if (!STI) 161 return; 162 163 Disassembler.reset(Tgt->createMCDisassembler(*STI)); 164 165 if (!Disassembler) 166 return; 167 168 InstInfos = Disassembler->getEDInfo(); 169 170 MII.reset(Tgt->createMCInstrInfo()); 171 172 if (!MII) 173 return; 174 175 InstString.reset(new std::string); 176 InstStream.reset(new raw_string_ostream(*InstString)); 177 InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo, 178 *MII, *MRI, *STI)); 179 180 if (!InstPrinter) 181 return; 182 183 GenericAsmLexer.reset(new AsmLexer(*AsmInfo)); 184 SpecificAsmLexer.reset(Tgt->createMCAsmLexer(*MRI, *AsmInfo)); 185 SpecificAsmLexer->InstallLexer(*GenericAsmLexer); 186 187 initMaps(*MRI); 188 189 Valid = true; 190} 191 192EDDisassembler::~EDDisassembler() { 193 if (!valid()) 194 return; 195} 196 197namespace { 198 /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback 199 /// as provided by the sd interface. See MemoryObject. 200 class EDMemoryObject : public llvm::MemoryObject { 201 private: 202 EDByteReaderCallback Callback; 203 void *Arg; 204 public: 205 EDMemoryObject(EDByteReaderCallback callback, 206 void *arg) : Callback(callback), Arg(arg) { } 207 ~EDMemoryObject() { } 208 uint64_t getBase() const { return 0x0; } 209 uint64_t getExtent() const { return (uint64_t)-1; } 210 int readByte(uint64_t address, uint8_t *ptr) const { 211 if (!Callback) 212 return -1; 213 214 if (Callback(ptr, address, Arg)) 215 return -1; 216 217 return 0; 218 } 219 }; 220} 221 222EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, 223 uint64_t address, 224 void *arg) { 225 EDMemoryObject memoryObject(byteReader, arg); 226 227 MCInst* inst = new MCInst; 228 uint64_t byteSize; 229 230 MCDisassembler::DecodeStatus S; 231 S = Disassembler->getInstruction(*inst, byteSize, memoryObject, address, 232 ErrorStream, nulls()); 233 switch (S) { 234 case MCDisassembler::Fail: 235 case MCDisassembler::SoftFail: 236 // FIXME: Do something different on soft failure mode? 237 delete inst; 238 return NULL; 239 240 case MCDisassembler::Success: { 241 const llvm::EDInstInfo *thisInstInfo = NULL; 242 243 if (InstInfos) { 244 thisInstInfo = &InstInfos[inst->getOpcode()]; 245 } 246 247 EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); 248 return sdInst; 249 } 250 } 251 return NULL; 252} 253 254void EDDisassembler::initMaps(const MCRegisterInfo ®isterInfo) { 255 unsigned numRegisters = registerInfo.getNumRegs(); 256 unsigned registerIndex; 257 258 for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) { 259 const char* registerName = registerInfo.getName(registerIndex); 260 261 RegVec.push_back(registerName); 262 RegRMap[registerName] = registerIndex; 263 } 264 265 switch (TgtTriple.getArch()) { 266 default: 267 break; 268 case Triple::x86: 269 case Triple::x86_64: 270 stackPointers.insert(registerIDWithName("SP")); 271 stackPointers.insert(registerIDWithName("ESP")); 272 stackPointers.insert(registerIDWithName("RSP")); 273 274 programCounters.insert(registerIDWithName("IP")); 275 programCounters.insert(registerIDWithName("EIP")); 276 programCounters.insert(registerIDWithName("RIP")); 277 break; 278 case Triple::arm: 279 case Triple::thumb: 280 stackPointers.insert(registerIDWithName("SP")); 281 282 programCounters.insert(registerIDWithName("PC")); 283 break; 284 } 285} 286 287const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const { 288 if (registerID >= RegVec.size()) 289 return NULL; 290 else 291 return RegVec[registerID].c_str(); 292} 293 294unsigned EDDisassembler::registerIDWithName(const char *name) const { 295 regrmap_t::const_iterator iter = RegRMap.find(std::string(name)); 296 if (iter == RegRMap.end()) 297 return 0; 298 else 299 return (*iter).second; 300} 301 302bool EDDisassembler::registerIsStackPointer(unsigned registerID) { 303 return (stackPointers.find(registerID) != stackPointers.end()); 304} 305 306bool EDDisassembler::registerIsProgramCounter(unsigned registerID) { 307 return (programCounters.find(registerID) != programCounters.end()); 308} 309 310int EDDisassembler::printInst(std::string &str, MCInst &inst) { 311 PrinterMutex.acquire(); 312 313 InstPrinter->printInst(&inst, *InstStream, ""); 314 InstStream->flush(); 315 str = *InstString; 316 InstString->clear(); 317 318 PrinterMutex.release(); 319 320 return 0; 321} 322 323static void diag_handler(const SMDiagnostic &diag, void *context) { 324 if (context) 325 diag.print("", static_cast<EDDisassembler*>(context)->ErrorStream); 326} 327 328int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, 329 SmallVectorImpl<AsmToken> &tokens, 330 const std::string &str) { 331 int ret = 0; 332 333 switch (TgtTriple.getArch()) { 334 default: 335 return -1; 336 case Triple::x86: 337 case Triple::x86_64: 338 case Triple::arm: 339 case Triple::thumb: 340 break; 341 } 342 343 const char *cStr = str.c_str(); 344 MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr)); 345 346 StringRef instName; 347 SMLoc instLoc; 348 349 SourceMgr sourceMgr; 350 sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this)); 351 sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over 352 MCContext context(*AsmInfo, *MRI, NULL); 353 OwningPtr<MCStreamer> streamer(createNullStreamer(context)); 354 OwningPtr<MCAsmParser> genericParser(createMCAsmParser(sourceMgr, 355 context, *streamer, 356 *AsmInfo)); 357 358 OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(Key.Triple.c_str(), "", "")); 359 OwningPtr<MCTargetAsmParser> 360 TargetParser(Tgt->createMCAsmParser(*STI, *genericParser)); 361 362 AsmToken OpcodeToken = genericParser->Lex(); 363 AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to 364 365 if (OpcodeToken.is(AsmToken::Identifier)) { 366 instName = OpcodeToken.getString(); 367 instLoc = OpcodeToken.getLoc(); 368 369 if (NextToken.isNot(AsmToken::Eof) && 370 TargetParser->ParseInstruction(instName, instLoc, operands)) 371 ret = -1; 372 } else { 373 ret = -1; 374 } 375 376 ParserMutex.acquire(); 377 378 if (!ret) { 379 GenericAsmLexer->setBuffer(buf); 380 381 while (SpecificAsmLexer->Lex(), 382 SpecificAsmLexer->isNot(AsmToken::Eof) && 383 SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) { 384 if (SpecificAsmLexer->is(AsmToken::Error)) { 385 ret = -1; 386 break; 387 } 388 tokens.push_back(SpecificAsmLexer->getTok()); 389 } 390 } 391 392 ParserMutex.release(); 393 394 return ret; 395} 396 397int EDDisassembler::llvmSyntaxVariant() const { 398 return LLVMSyntaxVariant; 399} 400