1a3dcfb130044f306632a5fab43854eda4095a09cChris Lattner//===- Disassembler.cpp - Disassembler for hex strings --------------------===//
2ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan//
3ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan//                     The LLVM Compiler Infrastructure
4ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan//
5ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan// This file is distributed under the University of Illinois Open Source
6ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan// License. See LICENSE.TXT for details.
7ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan//
8ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan//===----------------------------------------------------------------------===//
9ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan//
10ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan// This class implements the disassembler of strings of bytes written in
11ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan// hexadecimal, from standard input or from a file.
12ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan//
13ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan//===----------------------------------------------------------------------===//
14ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan
15a3dcfb130044f306632a5fab43854eda4095a09cChris Lattner#include "Disassembler.h"
16f010c464a11444733ec67e31aace8bcebeaf2588Chandler Carruth#include "llvm/ADT/Triple.h"
17dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/MC/MCAsmInfo.h"
18dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/MC/MCContext.h"
19de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/MC/MCDisassembler/MCDisassembler.h"
20ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan#include "llvm/MC/MCInst.h"
21dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/MC/MCRegisterInfo.h"
22d0c478d95f440b4db76279fe47d6cf734a28fa9aRichard Barton#include "llvm/MC/MCStreamer.h"
23b950585cc5a0d665e9accfe5ce490cd269756f2eJames Molloy#include "llvm/MC/MCSubtargetInfo.h"
24ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan#include "llvm/Support/MemoryBuffer.h"
25c3de94fabf3858ad57373171fa6bda407f2224c9Chris Lattner#include "llvm/Support/SourceMgr.h"
263e74d6fdd248e20a280f1dff3da9a6c689c2c4c3Evan Cheng#include "llvm/Support/TargetRegistry.h"
273e74d6fdd248e20a280f1dff3da9a6c689c2c4c3Evan Cheng#include "llvm/Support/raw_ostream.h"
28d0c478d95f440b4db76279fe47d6cf734a28fa9aRichard Barton
29ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callananusing namespace llvm;
30ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan
3137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinestypedef std::pair<std::vector<unsigned char>, std::vector<const char *>>
3237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ByteArrayTy;
33ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan
34c6ab1901f922c854c84e9e055cbc83601df80addDaniel Dunbarstatic bool PrintInsts(const MCDisassembler &DisAsm,
35d0c478d95f440b4db76279fe47d6cf734a28fa9aRichard Barton                       const ByteArrayTy &Bytes,
36d0c478d95f440b4db76279fe47d6cf734a28fa9aRichard Barton                       SourceMgr &SM, raw_ostream &Out,
3736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                       MCStreamer &Streamer, bool InAtomicBlock,
3836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                       const MCSubtargetInfo &STI) {
3937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ArrayRef<uint8_t> Data(Bytes.first.data(), Bytes.first.size());
40f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
412e235a826d2f65a064b2a39b27c775d0adf8b7c3Sean Callanan  // Disassemble it to strings.
42665e947740bb1909f9c3dc60927e8b9620d644e5Chris Lattner  uint64_t Size;
432e235a826d2f65a064b2a39b27c775d0adf8b7c3Sean Callanan  uint64_t Index;
44f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
4537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (Index = 0; Index < Bytes.first.size(); Index += Size) {
462e235a826d2f65a064b2a39b27c775d0adf8b7c3Sean Callanan    MCInst Inst;
47f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
4883e3f67fb68d497b600da83a62f000fcce7868a9Owen Anderson    MCDisassembler::DecodeStatus S;
4937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index,
5098c5ddabca1debf935a07d14d0cbc9732374bdb8Owen Anderson                              /*REMOVE*/ nulls(), nulls());
5183e3f67fb68d497b600da83a62f000fcce7868a9Owen Anderson    switch (S) {
5283e3f67fb68d497b600da83a62f000fcce7868a9Owen Anderson    case MCDisassembler::Fail:
5337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
543f2d5f60b31fd057c10f77b2e607b23a8c94f6d3Chris Lattner                      SourceMgr::DK_Warning,
553f2d5f60b31fd057c10f77b2e607b23a8c94f6d3Chris Lattner                      "invalid instruction encoding");
5638c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      // Don't try to resynchronise the stream in a block
5738c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      if (InAtomicBlock)
5838c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover        return true;
5938c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover
602e235a826d2f65a064b2a39b27c775d0adf8b7c3Sean Callanan      if (Size == 0)
612e235a826d2f65a064b2a39b27c775d0adf8b7c3Sean Callanan        Size = 1; // skip illegible bytes
6238c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover
6383e3f67fb68d497b600da83a62f000fcce7868a9Owen Anderson      break;
6483e3f67fb68d497b600da83a62f000fcce7868a9Owen Anderson
6583e3f67fb68d497b600da83a62f000fcce7868a9Owen Anderson    case MCDisassembler::SoftFail:
6637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
673f2d5f60b31fd057c10f77b2e607b23a8c94f6d3Chris Lattner                      SourceMgr::DK_Warning,
683f2d5f60b31fd057c10f77b2e607b23a8c94f6d3Chris Lattner                      "potentially undefined instruction encoding");
6983e3f67fb68d497b600da83a62f000fcce7868a9Owen Anderson      // Fall through
7083e3f67fb68d497b600da83a62f000fcce7868a9Owen Anderson
7183e3f67fb68d497b600da83a62f000fcce7868a9Owen Anderson    case MCDisassembler::Success:
7236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Streamer.EmitInstruction(Inst, STI);
7383e3f67fb68d497b600da83a62f000fcce7868a9Owen Anderson      break;
742e235a826d2f65a064b2a39b27c775d0adf8b7c3Sean Callanan    }
75ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan  }
76f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
77665e947740bb1909f9c3dc60927e8b9620d644e5Chris Lattner  return false;
78ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan}
79ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan
8038c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northoverstatic bool SkipToToken(StringRef &Str) {
8137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (;;) {
8237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (Str.empty())
8337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return false;
8437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
8538c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    // Strip horizontal whitespace and commas.
8637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) {
872adbef06a637b367f724d0a46f7fa78d5827ec64Chris Lattner      Str = Str.substr(Pos);
8837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      continue;
892adbef06a637b367f724d0a46f7fa78d5827ec64Chris Lattner    }
90f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
9137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // If this is the start of a comment, remove the rest of the line.
9237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (Str[0] == '#') {
932adbef06a637b367f724d0a46f7fa78d5827ec64Chris Lattner        Str = Str.substr(Str.find_first_of('\n'));
942adbef06a637b367f724d0a46f7fa78d5827ec64Chris Lattner      continue;
95ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan    }
9637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return true;
9738c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover  }
9838c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover}
9938c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover
10038c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover
10138c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northoverstatic bool ByteArrayFromString(ByteArrayTy &ByteArray,
10238c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover                                StringRef &Str,
10338c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover                                SourceMgr &SM) {
10438c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover  while (SkipToToken(Str)) {
10538c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    // Handled by higher level
10638c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    if (Str[0] == '[' || Str[0] == ']')
10738c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      return false;
108f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
109ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan    // Get the current token.
11038c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    size_t Next = Str.find_first_of(" \t\n\r,#[]");
1112adbef06a637b367f724d0a46f7fa78d5827ec64Chris Lattner    StringRef Value = Str.substr(0, Next);
112f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
113ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan    // Convert to a byte and add to the byte vector.
114222af464822c9c47b2859e813912ed6ba5339217Chris Lattner    unsigned ByteVal;
115222af464822c9c47b2859e813912ed6ba5339217Chris Lattner    if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
116c3de94fabf3858ad57373171fa6bda407f2224c9Chris Lattner      // If we have an error, print it and skip to the end of line.
1173f2d5f60b31fd057c10f77b2e607b23a8c94f6d3Chris Lattner      SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
1183f2d5f60b31fd057c10f77b2e607b23a8c94f6d3Chris Lattner                      "invalid input token");
119c3de94fabf3858ad57373171fa6bda407f2224c9Chris Lattner      Str = Str.substr(Str.find('\n'));
12037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ByteArray.first.clear();
12137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ByteArray.second.clear();
122c3de94fabf3858ad57373171fa6bda407f2224c9Chris Lattner      continue;
123ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan    }
124f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
12537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ByteArray.first.push_back(ByteVal);
12637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ByteArray.second.push_back(Value.data());
127222af464822c9c47b2859e813912ed6ba5339217Chris Lattner    Str = Str.substr(Next);
128ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan  }
129f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
130668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan  return false;
131668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan}
132668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan
133b262799d49891b036daa00eddf51947487346c98Evan Chengint Disassembler::disassemble(const Target &T,
134a5c177e70a42f48e4885075c4c48aad0816a2817Bill Wendling                              const std::string &Triple,
135d0c478d95f440b4db76279fe47d6cf734a28fa9aRichard Barton                              MCSubtargetInfo &STI,
136d0c478d95f440b4db76279fe47d6cf734a28fa9aRichard Barton                              MCStreamer &Streamer,
137d5826a33a5a7c298a8934541d11cda042028be3bDan Gohman                              MemoryBuffer &Buffer,
138d0c478d95f440b4db76279fe47d6cf734a28fa9aRichard Barton                              SourceMgr &SM,
139d5826a33a5a7c298a8934541d11cda042028be3bDan Gohman                              raw_ostream &Out) {
140dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
141dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
142dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!MRI) {
143dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    errs() << "error: no register info for target " << Triple << "\n";
144dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return -1;
145dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
146dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
147dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  std::unique_ptr<const MCAsmInfo> MAI(T.createMCAsmInfo(*MRI, Triple));
148dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!MAI) {
149dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    errs() << "error: no assembly info for target " << Triple << "\n";
150dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return -1;
151dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
152dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
153dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Set up the MCContext for creating symbols and MCExpr's.
154dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  MCContext Ctx(MAI.get(), MRI.get(), nullptr);
155dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
156dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  std::unique_ptr<const MCDisassembler> DisAsm(
157dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    T.createMCDisassembler(STI, Ctx));
158668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan  if (!DisAsm) {
159668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan    errs() << "error: no disassembler for target " << Triple << "\n";
160668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan    return -1;
161668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan  }
162f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
163d0c478d95f440b4db76279fe47d6cf734a28fa9aRichard Barton  // Set up initial section manually here
16437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  Streamer.InitSections(false);
165f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
166668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan  bool ErrorOccurred = false;
167f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
168668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan  // Convert the input to a vector for disassembly.
169668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan  ByteArrayTy ByteArray;
170668b15467be158a5f0e0c4a5e1ec232da021892cSean Callanan  StringRef Str = Buffer.getBuffer();
17138c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover  bool InAtomicBlock = false;
17238c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover
17338c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover  while (SkipToToken(Str)) {
17437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ByteArray.first.clear();
17537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ByteArray.second.clear();
17638c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover
17738c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    if (Str[0] == '[') {
17838c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      if (InAtomicBlock) {
17938c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover        SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
18038c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover                        "nested atomic blocks make no sense");
18138c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover        ErrorOccurred = true;
18238c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      }
18338c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      InAtomicBlock = true;
18438c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      Str = Str.drop_front();
18538c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      continue;
18638c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    } else if (Str[0] == ']') {
18738c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      if (!InAtomicBlock) {
18838c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover        SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
18938c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover                        "attempt to close atomic block without opening");
19038c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover        ErrorOccurred = true;
19138c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      }
19238c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      InAtomicBlock = false;
19338c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      Str = Str.drop_front();
19438c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      continue;
19538c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    }
196f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
19738c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    // It's a real token, get the bytes and emit them
19838c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
199f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
20037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (!ByteArray.first.empty())
20138c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover      ErrorOccurred |= PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer,
20236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                  InAtomicBlock, STI);
20338c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover  }
20438c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover
20538c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover  if (InAtomicBlock) {
20638c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
20738c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover                    "unclosed atomic block");
20838c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover    ErrorOccurred = true;
20938c6ff6c111fcc53debb9e2880f89e2dd0676217Tim Northover  }
210f5bf3cf7e2a0ff1ca884a83a8b56b5a57f8a5c80Jim Grosbach
211665e947740bb1909f9c3dc60927e8b9620d644e5Chris Lattner  return ErrorOccurred;
212ba847da571354e13f1caa3699ee06b2d57df9fe9Sean Callanan}
213