1//===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11#define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
12
13#include "llvm/ADT/StringRef.h"
14#include "llvm/MC/MCExpr.h"
15#include "llvm/MC/MCInstrInfo.h"
16#include "llvm/MC/MCParser/MCAsmLexer.h"
17#include "llvm/MC/MCParser/MCAsmParserExtension.h"
18#include "llvm/MC/MCTargetOptions.h"
19#include "llvm/Support/SMLoc.h"
20#include <cstdint>
21#include <memory>
22
23namespace llvm {
24
25class MCInst;
26class MCParsedAsmOperand;
27class MCStreamer;
28class MCSubtargetInfo;
29template <typename T> class SmallVectorImpl;
30
31using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
32
33enum AsmRewriteKind {
34  AOK_Align,          // Rewrite align as .align.
35  AOK_EVEN,           // Rewrite even as .even.
36  AOK_Emit,           // Rewrite _emit as .byte.
37  AOK_Input,          // Rewrite in terms of $N.
38  AOK_Output,         // Rewrite in terms of $N.
39  AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
40  AOK_Label,          // Rewrite local labels.
41  AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
42  AOK_Skip,           // Skip emission (e.g., offset/type operators).
43  AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
44};
45
46const char AsmRewritePrecedence [] = {
47  2, // AOK_Align
48  2, // AOK_EVEN
49  2, // AOK_Emit
50  3, // AOK_Input
51  3, // AOK_Output
52  5, // AOK_SizeDirective
53  1, // AOK_Label
54  5, // AOK_EndOfStatement
55  2, // AOK_Skip
56  2  // AOK_IntelExpr
57};
58
59// Represnt the various parts which makes up an intel expression,
60// used for emitting compound intel expressions
61struct IntelExpr {
62  bool NeedBracs;
63  int64_t Imm;
64  StringRef BaseReg;
65  StringRef IndexReg;
66  unsigned Scale;
67
68  IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
69    BaseReg(StringRef()), IndexReg(StringRef()),
70    Scale(1) {}
71  // Compund immediate expression
72  IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
73    Imm = imm;
74  }
75  // [Reg + ImmediateExpression]
76  // We don't bother to emit an immediate expression evaluated to zero
77  IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
78    bool needBracs = true) :
79    IntelExpr(imm, needBracs) {
80    IndexReg = reg;
81    if (scale)
82      Scale = scale;
83  }
84  // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
85  IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
86    int64_t imm = 0, bool needBracs = true) :
87    IntelExpr(indexReg, imm, scale, needBracs) {
88    BaseReg = baseReg;
89  }
90  bool hasBaseReg() const {
91    return BaseReg.size();
92  }
93  bool hasIndexReg() const {
94    return IndexReg.size();
95  }
96  bool hasRegs() const {
97    return hasBaseReg() || hasIndexReg();
98  }
99  bool isValid() const {
100    return (Scale == 1) ||
101           (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
102  }
103};
104
105struct AsmRewrite {
106  AsmRewriteKind Kind;
107  SMLoc Loc;
108  unsigned Len;
109  int64_t Val;
110  StringRef Label;
111  IntelExpr IntelExp;
112
113public:
114  AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
115    : Kind(kind), Loc(loc), Len(len), Val(val) {}
116  AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
117    : AsmRewrite(kind, loc, len) { Label = label; }
118  AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
119    : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
120};
121
122struct ParseInstructionInfo {
123  SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
124
125  ParseInstructionInfo() = default;
126  ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
127    : AsmRewrites(rewrites) {}
128};
129
130enum OperandMatchResultTy {
131  MatchOperand_Success,  // operand matched successfully
132  MatchOperand_NoMatch,  // operand did not match
133  MatchOperand_ParseFail // operand matched but had errors
134};
135
136// When matching of an assembly instruction fails, there may be multiple
137// encodings that are close to being a match. It's often ambiguous which one
138// the programmer intended to use, so we want to report an error which mentions
139// each of these "near-miss" encodings. This struct contains information about
140// one such encoding, and why it did not match the parsed instruction.
141class NearMissInfo {
142public:
143  enum NearMissKind {
144    NoNearMiss,
145    NearMissOperand,
146    NearMissFeature,
147    NearMissPredicate,
148    NearMissTooFewOperands,
149  };
150
151  // The encoding is valid for the parsed assembly string. This is only used
152  // internally to the table-generated assembly matcher.
153  static NearMissInfo getSuccess() { return NearMissInfo(); }
154
155  // The instruction encoding is not valid because it requires some target
156  // features that are not currently enabled. MissingFeatures has a bit set for
157  // each feature that the encoding needs but which is not enabled.
158  static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
159    NearMissInfo Result;
160    Result.Kind = NearMissFeature;
161    Result.Features = MissingFeatures;
162    return Result;
163  }
164
165  // The instruction encoding is not valid because the target-specific
166  // predicate function returned an error code. FailureCode is the
167  // target-specific error code returned by the predicate.
168  static NearMissInfo getMissedPredicate(unsigned FailureCode) {
169    NearMissInfo Result;
170    Result.Kind = NearMissPredicate;
171    Result.PredicateError = FailureCode;
172    return Result;
173  }
174
175  // The instruction encoding is not valid because one (and only one) parsed
176  // operand is not of the correct type. OperandError is the error code
177  // relating to the operand class expected by the encoding. OperandClass is
178  // the type of the expected operand. Opcode is the opcode of the encoding.
179  // OperandIndex is the index into the parsed operand list.
180  static NearMissInfo getMissedOperand(unsigned OperandError,
181                                       unsigned OperandClass, unsigned Opcode,
182                                       unsigned OperandIndex) {
183    NearMissInfo Result;
184    Result.Kind = NearMissOperand;
185    Result.MissedOperand.Error = OperandError;
186    Result.MissedOperand.Class = OperandClass;
187    Result.MissedOperand.Opcode = Opcode;
188    Result.MissedOperand.Index = OperandIndex;
189    return Result;
190  }
191
192  // The instruction encoding is not valid because it expects more operands
193  // than were parsed. OperandClass is the class of the expected operand that
194  // was not provided. Opcode is the instruction encoding.
195  static NearMissInfo getTooFewOperands(unsigned OperandClass,
196                                        unsigned Opcode) {
197    NearMissInfo Result;
198    Result.Kind = NearMissTooFewOperands;
199    Result.TooFewOperands.Class = OperandClass;
200    Result.TooFewOperands.Opcode = Opcode;
201    return Result;
202  }
203
204  operator bool() const { return Kind != NoNearMiss; }
205
206  NearMissKind getKind() const { return Kind; }
207
208  // Feature flags required by the instruction, that the current target does
209  // not have.
210  uint64_t getFeatures() const {
211    assert(Kind == NearMissFeature);
212    return Features;
213  }
214  // Error code returned by the target predicate when validating this
215  // instruction encoding.
216  unsigned getPredicateError() const {
217    assert(Kind == NearMissPredicate);
218    return PredicateError;
219  }
220  // MatchClassKind of the operand that we expected to see.
221  unsigned getOperandClass() const {
222    assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
223    return MissedOperand.Class;
224  }
225  // Opcode of the encoding we were trying to match.
226  unsigned getOpcode() const {
227    assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
228    return MissedOperand.Opcode;
229  }
230  // Error code returned when validating the operand.
231  unsigned getOperandError() const {
232    assert(Kind == NearMissOperand);
233    return MissedOperand.Error;
234  }
235  // Index of the actual operand we were trying to match in the list of parsed
236  // operands.
237  unsigned getOperandIndex() const {
238    assert(Kind == NearMissOperand);
239    return MissedOperand.Index;
240  }
241
242private:
243  NearMissKind Kind;
244
245  // These two structs share a common prefix, so we can safely rely on the fact
246  // that they overlap in the union.
247  struct MissedOpInfo {
248    unsigned Class;
249    unsigned Opcode;
250    unsigned Error;
251    unsigned Index;
252  };
253
254  struct TooFewOperandsInfo {
255    unsigned Class;
256    unsigned Opcode;
257  };
258
259  union {
260    uint64_t Features;
261    unsigned PredicateError;
262    MissedOpInfo MissedOperand;
263    TooFewOperandsInfo TooFewOperands;
264  };
265
266  NearMissInfo() : Kind(NoNearMiss) {}
267};
268
269/// MCTargetAsmParser - Generic interface to target specific assembly parsers.
270class MCTargetAsmParser : public MCAsmParserExtension {
271public:
272  enum MatchResultTy {
273    Match_InvalidOperand,
274    Match_MissingFeature,
275    Match_MnemonicFail,
276    Match_Success,
277    Match_NearMisses,
278    FIRST_TARGET_MATCH_RESULT_TY
279  };
280
281protected: // Can only create subclasses.
282  MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
283                    const MCInstrInfo &MII);
284
285  /// Create a copy of STI and return a non-const reference to it.
286  MCSubtargetInfo &copySTI();
287
288  /// AvailableFeatures - The current set of available features.
289  uint64_t AvailableFeatures = 0;
290
291  /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
292  bool ParsingInlineAsm = false;
293
294  /// SemaCallback - The Sema callback implementation.  Must be set when parsing
295  /// ms-style inline assembly.
296  MCAsmParserSemaCallback *SemaCallback;
297
298  /// Set of options which affects instrumentation of inline assembly.
299  MCTargetOptions MCOptions;
300
301  /// Current STI.
302  const MCSubtargetInfo *STI;
303
304  const MCInstrInfo &MII;
305
306public:
307  MCTargetAsmParser(const MCTargetAsmParser &) = delete;
308  MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
309
310  ~MCTargetAsmParser() override;
311
312  const MCSubtargetInfo &getSTI() const;
313
314  uint64_t getAvailableFeatures() const { return AvailableFeatures; }
315  void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
316
317  bool isParsingInlineAsm () { return ParsingInlineAsm; }
318  void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
319
320  MCTargetOptions getTargetOptions() const { return MCOptions; }
321
322  void setSemaCallback(MCAsmParserSemaCallback *Callback) {
323    SemaCallback = Callback;
324  }
325
326  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
327                             SMLoc &EndLoc) = 0;
328
329  /// Sets frame register corresponding to the current MachineFunction.
330  virtual void SetFrameRegister(unsigned RegNo) {}
331
332  /// ParseInstruction - Parse one assembly instruction.
333  ///
334  /// The parser is positioned following the instruction name. The target
335  /// specific instruction parser should parse the entire instruction and
336  /// construct the appropriate MCInst, or emit an error. On success, the entire
337  /// line should be parsed up to and including the end-of-statement token. On
338  /// failure, the parser is not required to read to the end of the line.
339  //
340  /// \param Name - The instruction name.
341  /// \param NameLoc - The source location of the name.
342  /// \param Operands [out] - The list of parsed operands, this returns
343  ///        ownership of them to the caller.
344  /// \return True on failure.
345  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
346                                SMLoc NameLoc, OperandVector &Operands) = 0;
347  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
348                                AsmToken Token, OperandVector &Operands) {
349    return ParseInstruction(Info, Name, Token.getLoc(), Operands);
350  }
351
352  /// ParseDirective - Parse a target specific assembler directive
353  ///
354  /// The parser is positioned following the directive name.  The target
355  /// specific directive parser should parse the entire directive doing or
356  /// recording any target specific work, or return true and do nothing if the
357  /// directive is not target specific. If the directive is specific for
358  /// the target, the entire line is parsed up to and including the
359  /// end-of-statement token and false is returned.
360  ///
361  /// \param DirectiveID - the identifier token of the directive.
362  virtual bool ParseDirective(AsmToken DirectiveID) = 0;
363
364  /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
365  /// instruction as an actual MCInst and emit it to the specified MCStreamer.
366  /// This returns false on success and returns true on failure to match.
367  ///
368  /// On failure, the target parser is responsible for emitting a diagnostic
369  /// explaining the match failure.
370  virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
371                                       OperandVector &Operands, MCStreamer &Out,
372                                       uint64_t &ErrorInfo,
373                                       bool MatchingInlineAsm) = 0;
374
375  /// Allows targets to let registers opt out of clobber lists.
376  virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
377
378  /// Allow a target to add special case operand matching for things that
379  /// tblgen doesn't/can't handle effectively. For example, literal
380  /// immediates on ARM. TableGen expects a token operand, but the parser
381  /// will recognize them as immediates.
382  virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
383                                              unsigned Kind) {
384    return Match_InvalidOperand;
385  }
386
387  /// Validate the instruction match against any complex target predicates
388  /// before rendering any operands to it.
389  virtual unsigned
390  checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
391    return Match_Success;
392  }
393
394  /// checkTargetMatchPredicate - Validate the instruction match against
395  /// any complex target predicates not expressible via match classes.
396  virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
397    return Match_Success;
398  }
399
400  virtual void convertToMapAndConstraints(unsigned Kind,
401                                          const OperandVector &Operands) = 0;
402
403  // Return whether this parser uses assignment statements with equals tokens
404  virtual bool equalIsAsmAssignment() { return true; };
405  // Return whether this start of statement identifier is a label
406  virtual bool isLabel(AsmToken &Token) { return true; };
407  // Return whether this parser accept star as start of statement
408  virtual bool starIsStartOfStatement() { return false; };
409
410  virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
411                                            MCSymbolRefExpr::VariantKind,
412                                            MCContext &Ctx) {
413    return nullptr;
414  }
415
416  virtual void onLabelParsed(MCSymbol *Symbol) {}
417
418  /// Ensure that all previously parsed instructions have been emitted to the
419  /// output streamer, if the target does not emit them immediately.
420  virtual void flushPendingInstructions(MCStreamer &Out) {}
421
422  virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
423                                              AsmToken::TokenKind OperatorToken,
424                                              MCContext &Ctx) {
425    return nullptr;
426  }
427};
428
429} // end namespace llvm
430
431#endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
432