1//===- Disassembler.cpp - Disassembler for hex strings --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class implements the disassembler of strings of bytes written in
11// hexadecimal, from standard input or from a file.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Disassembler.h"
16#include "llvm/ADT/Triple.h"
17#include "llvm/MC/MCAsmInfo.h"
18#include "llvm/MC/MCContext.h"
19#include "llvm/MC/MCDisassembler.h"
20#include "llvm/MC/MCInst.h"
21#include "llvm/MC/MCRegisterInfo.h"
22#include "llvm/MC/MCStreamer.h"
23#include "llvm/MC/MCSubtargetInfo.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/MemoryObject.h"
26#include "llvm/Support/SourceMgr.h"
27#include "llvm/Support/TargetRegistry.h"
28#include "llvm/Support/raw_ostream.h"
29
30using namespace llvm;
31
32typedef std::vector<std::pair<unsigned char, const char*> > ByteArrayTy;
33
34namespace {
35class VectorMemoryObject : public MemoryObject {
36private:
37  const ByteArrayTy &Bytes;
38public:
39  VectorMemoryObject(const ByteArrayTy &bytes) : Bytes(bytes) {}
40
41  uint64_t getBase() const override { return 0; }
42  uint64_t getExtent() const override { return Bytes.size(); }
43
44  int readByte(uint64_t Addr, uint8_t *Byte) const override {
45    if (Addr >= getExtent())
46      return -1;
47    *Byte = Bytes[Addr].first;
48    return 0;
49  }
50};
51}
52
53static bool PrintInsts(const MCDisassembler &DisAsm,
54                       const ByteArrayTy &Bytes,
55                       SourceMgr &SM, raw_ostream &Out,
56                       MCStreamer &Streamer, bool InAtomicBlock,
57                       const MCSubtargetInfo &STI) {
58  // Wrap the vector in a MemoryObject.
59  VectorMemoryObject memoryObject(Bytes);
60
61  // Disassemble it to strings.
62  uint64_t Size;
63  uint64_t Index;
64
65  for (Index = 0; Index < Bytes.size(); Index += Size) {
66    MCInst Inst;
67
68    MCDisassembler::DecodeStatus S;
69    S = DisAsm.getInstruction(Inst, Size, memoryObject, Index,
70                              /*REMOVE*/ nulls(), nulls());
71    switch (S) {
72    case MCDisassembler::Fail:
73      SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
74                      SourceMgr::DK_Warning,
75                      "invalid instruction encoding");
76      // Don't try to resynchronise the stream in a block
77      if (InAtomicBlock)
78        return true;
79
80      if (Size == 0)
81        Size = 1; // skip illegible bytes
82
83      break;
84
85    case MCDisassembler::SoftFail:
86      SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
87                      SourceMgr::DK_Warning,
88                      "potentially undefined instruction encoding");
89      // Fall through
90
91    case MCDisassembler::Success:
92      Streamer.EmitInstruction(Inst, STI);
93      break;
94    }
95  }
96
97  return false;
98}
99
100static bool SkipToToken(StringRef &Str) {
101  while (!Str.empty() && Str.find_first_not_of(" \t\r\n#,") != 0) {
102    // Strip horizontal whitespace and commas.
103    if (size_t Pos = Str.find_first_not_of(" \t\r,")) {
104      Str = Str.substr(Pos);
105    }
106
107    // If this is the end of a line or start of a comment, remove the rest of
108    // the line.
109    if (Str[0] == '\n' || Str[0] == '#') {
110      // Strip to the end of line if we already processed any bytes on this
111      // line.  This strips the comment and/or the \n.
112      if (Str[0] == '\n') {
113        Str = Str.substr(1);
114      } else {
115        Str = Str.substr(Str.find_first_of('\n'));
116        if (!Str.empty())
117          Str = Str.substr(1);
118      }
119      continue;
120    }
121  }
122
123  return !Str.empty();
124}
125
126
127static bool ByteArrayFromString(ByteArrayTy &ByteArray,
128                                StringRef &Str,
129                                SourceMgr &SM) {
130  while (SkipToToken(Str)) {
131    // Handled by higher level
132    if (Str[0] == '[' || Str[0] == ']')
133      return false;
134
135    // Get the current token.
136    size_t Next = Str.find_first_of(" \t\n\r,#[]");
137    StringRef Value = Str.substr(0, Next);
138
139    // Convert to a byte and add to the byte vector.
140    unsigned ByteVal;
141    if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
142      // If we have an error, print it and skip to the end of line.
143      SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
144                      "invalid input token");
145      Str = Str.substr(Str.find('\n'));
146      ByteArray.clear();
147      continue;
148    }
149
150    ByteArray.push_back(std::make_pair((unsigned char)ByteVal, Value.data()));
151    Str = Str.substr(Next);
152  }
153
154  return false;
155}
156
157int Disassembler::disassemble(const Target &T,
158                              const std::string &Triple,
159                              MCSubtargetInfo &STI,
160                              MCStreamer &Streamer,
161                              MemoryBuffer &Buffer,
162                              SourceMgr &SM,
163                              raw_ostream &Out) {
164
165  std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
166  if (!MRI) {
167    errs() << "error: no register info for target " << Triple << "\n";
168    return -1;
169  }
170
171  std::unique_ptr<const MCAsmInfo> MAI(T.createMCAsmInfo(*MRI, Triple));
172  if (!MAI) {
173    errs() << "error: no assembly info for target " << Triple << "\n";
174    return -1;
175  }
176
177  // Set up the MCContext for creating symbols and MCExpr's.
178  MCContext Ctx(MAI.get(), MRI.get(), nullptr);
179
180  std::unique_ptr<const MCDisassembler> DisAsm(
181    T.createMCDisassembler(STI, Ctx));
182  if (!DisAsm) {
183    errs() << "error: no disassembler for target " << Triple << "\n";
184    return -1;
185  }
186
187  // Set up initial section manually here
188  Streamer.InitSections();
189
190  bool ErrorOccurred = false;
191
192  // Convert the input to a vector for disassembly.
193  ByteArrayTy ByteArray;
194  StringRef Str = Buffer.getBuffer();
195  bool InAtomicBlock = false;
196
197  while (SkipToToken(Str)) {
198    ByteArray.clear();
199
200    if (Str[0] == '[') {
201      if (InAtomicBlock) {
202        SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
203                        "nested atomic blocks make no sense");
204        ErrorOccurred = true;
205      }
206      InAtomicBlock = true;
207      Str = Str.drop_front();
208      continue;
209    } else if (Str[0] == ']') {
210      if (!InAtomicBlock) {
211        SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
212                        "attempt to close atomic block without opening");
213        ErrorOccurred = true;
214      }
215      InAtomicBlock = false;
216      Str = Str.drop_front();
217      continue;
218    }
219
220    // It's a real token, get the bytes and emit them
221    ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
222
223    if (!ByteArray.empty())
224      ErrorOccurred |= PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer,
225                                  InAtomicBlock, STI);
226  }
227
228  if (InAtomicBlock) {
229    SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
230                    "unclosed atomic block");
231    ErrorOccurred = true;
232  }
233
234  return ErrorOccurred;
235}
236