1//===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 11#define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 12 13#include "llvm/ADT/StringRef.h" 14#include "llvm/MC/MCExpr.h" 15#include "llvm/MC/MCInstrInfo.h" 16#include "llvm/MC/MCParser/MCAsmLexer.h" 17#include "llvm/MC/MCParser/MCAsmParserExtension.h" 18#include "llvm/MC/MCTargetOptions.h" 19#include "llvm/Support/SMLoc.h" 20#include <cstdint> 21#include <memory> 22 23namespace llvm { 24 25class MCInst; 26class MCParsedAsmOperand; 27class MCStreamer; 28class MCSubtargetInfo; 29template <typename T> class SmallVectorImpl; 30 31using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>; 32 33enum AsmRewriteKind { 34 AOK_Align, // Rewrite align as .align. 35 AOK_EVEN, // Rewrite even as .even. 36 AOK_Emit, // Rewrite _emit as .byte. 37 AOK_Input, // Rewrite in terms of $N. 38 AOK_Output, // Rewrite in terms of $N. 39 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). 40 AOK_Label, // Rewrite local labels. 41 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). 42 AOK_Skip, // Skip emission (e.g., offset/type operators). 43 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp] 44}; 45 46const char AsmRewritePrecedence [] = { 47 2, // AOK_Align 48 2, // AOK_EVEN 49 2, // AOK_Emit 50 3, // AOK_Input 51 3, // AOK_Output 52 5, // AOK_SizeDirective 53 1, // AOK_Label 54 5, // AOK_EndOfStatement 55 2, // AOK_Skip 56 2 // AOK_IntelExpr 57}; 58 59// Represnt the various parts which makes up an intel expression, 60// used for emitting compound intel expressions 61struct IntelExpr { 62 bool NeedBracs; 63 int64_t Imm; 64 StringRef BaseReg; 65 StringRef IndexReg; 66 unsigned Scale; 67 68 IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0), 69 BaseReg(StringRef()), IndexReg(StringRef()), 70 Scale(1) {} 71 // Compund immediate expression 72 IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) { 73 Imm = imm; 74 } 75 // [Reg + ImmediateExpression] 76 // We don't bother to emit an immediate expression evaluated to zero 77 IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0, 78 bool needBracs = true) : 79 IntelExpr(imm, needBracs) { 80 IndexReg = reg; 81 if (scale) 82 Scale = scale; 83 } 84 // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression] 85 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0, 86 int64_t imm = 0, bool needBracs = true) : 87 IntelExpr(indexReg, imm, scale, needBracs) { 88 BaseReg = baseReg; 89 } 90 bool hasBaseReg() const { 91 return BaseReg.size(); 92 } 93 bool hasIndexReg() const { 94 return IndexReg.size(); 95 } 96 bool hasRegs() const { 97 return hasBaseReg() || hasIndexReg(); 98 } 99 bool isValid() const { 100 return (Scale == 1) || 101 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); 102 } 103}; 104 105struct AsmRewrite { 106 AsmRewriteKind Kind; 107 SMLoc Loc; 108 unsigned Len; 109 int64_t Val; 110 StringRef Label; 111 IntelExpr IntelExp; 112 113public: 114 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0) 115 : Kind(kind), Loc(loc), Len(len), Val(val) {} 116 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) 117 : AsmRewrite(kind, loc, len) { Label = label; } 118 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) 119 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; } 120}; 121 122struct ParseInstructionInfo { 123 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; 124 125 ParseInstructionInfo() = default; 126 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) 127 : AsmRewrites(rewrites) {} 128}; 129 130enum OperandMatchResultTy { 131 MatchOperand_Success, // operand matched successfully 132 MatchOperand_NoMatch, // operand did not match 133 MatchOperand_ParseFail // operand matched but had errors 134}; 135 136// When matching of an assembly instruction fails, there may be multiple 137// encodings that are close to being a match. It's often ambiguous which one 138// the programmer intended to use, so we want to report an error which mentions 139// each of these "near-miss" encodings. This struct contains information about 140// one such encoding, and why it did not match the parsed instruction. 141class NearMissInfo { 142public: 143 enum NearMissKind { 144 NoNearMiss, 145 NearMissOperand, 146 NearMissFeature, 147 NearMissPredicate, 148 NearMissTooFewOperands, 149 }; 150 151 // The encoding is valid for the parsed assembly string. This is only used 152 // internally to the table-generated assembly matcher. 153 static NearMissInfo getSuccess() { return NearMissInfo(); } 154 155 // The instruction encoding is not valid because it requires some target 156 // features that are not currently enabled. MissingFeatures has a bit set for 157 // each feature that the encoding needs but which is not enabled. 158 static NearMissInfo getMissedFeature(uint64_t MissingFeatures) { 159 NearMissInfo Result; 160 Result.Kind = NearMissFeature; 161 Result.Features = MissingFeatures; 162 return Result; 163 } 164 165 // The instruction encoding is not valid because the target-specific 166 // predicate function returned an error code. FailureCode is the 167 // target-specific error code returned by the predicate. 168 static NearMissInfo getMissedPredicate(unsigned FailureCode) { 169 NearMissInfo Result; 170 Result.Kind = NearMissPredicate; 171 Result.PredicateError = FailureCode; 172 return Result; 173 } 174 175 // The instruction encoding is not valid because one (and only one) parsed 176 // operand is not of the correct type. OperandError is the error code 177 // relating to the operand class expected by the encoding. OperandClass is 178 // the type of the expected operand. Opcode is the opcode of the encoding. 179 // OperandIndex is the index into the parsed operand list. 180 static NearMissInfo getMissedOperand(unsigned OperandError, 181 unsigned OperandClass, unsigned Opcode, 182 unsigned OperandIndex) { 183 NearMissInfo Result; 184 Result.Kind = NearMissOperand; 185 Result.MissedOperand.Error = OperandError; 186 Result.MissedOperand.Class = OperandClass; 187 Result.MissedOperand.Opcode = Opcode; 188 Result.MissedOperand.Index = OperandIndex; 189 return Result; 190 } 191 192 // The instruction encoding is not valid because it expects more operands 193 // than were parsed. OperandClass is the class of the expected operand that 194 // was not provided. Opcode is the instruction encoding. 195 static NearMissInfo getTooFewOperands(unsigned OperandClass, 196 unsigned Opcode) { 197 NearMissInfo Result; 198 Result.Kind = NearMissTooFewOperands; 199 Result.TooFewOperands.Class = OperandClass; 200 Result.TooFewOperands.Opcode = Opcode; 201 return Result; 202 } 203 204 operator bool() const { return Kind != NoNearMiss; } 205 206 NearMissKind getKind() const { return Kind; } 207 208 // Feature flags required by the instruction, that the current target does 209 // not have. 210 uint64_t getFeatures() const { 211 assert(Kind == NearMissFeature); 212 return Features; 213 } 214 // Error code returned by the target predicate when validating this 215 // instruction encoding. 216 unsigned getPredicateError() const { 217 assert(Kind == NearMissPredicate); 218 return PredicateError; 219 } 220 // MatchClassKind of the operand that we expected to see. 221 unsigned getOperandClass() const { 222 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 223 return MissedOperand.Class; 224 } 225 // Opcode of the encoding we were trying to match. 226 unsigned getOpcode() const { 227 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 228 return MissedOperand.Opcode; 229 } 230 // Error code returned when validating the operand. 231 unsigned getOperandError() const { 232 assert(Kind == NearMissOperand); 233 return MissedOperand.Error; 234 } 235 // Index of the actual operand we were trying to match in the list of parsed 236 // operands. 237 unsigned getOperandIndex() const { 238 assert(Kind == NearMissOperand); 239 return MissedOperand.Index; 240 } 241 242private: 243 NearMissKind Kind; 244 245 // These two structs share a common prefix, so we can safely rely on the fact 246 // that they overlap in the union. 247 struct MissedOpInfo { 248 unsigned Class; 249 unsigned Opcode; 250 unsigned Error; 251 unsigned Index; 252 }; 253 254 struct TooFewOperandsInfo { 255 unsigned Class; 256 unsigned Opcode; 257 }; 258 259 union { 260 uint64_t Features; 261 unsigned PredicateError; 262 MissedOpInfo MissedOperand; 263 TooFewOperandsInfo TooFewOperands; 264 }; 265 266 NearMissInfo() : Kind(NoNearMiss) {} 267}; 268 269/// MCTargetAsmParser - Generic interface to target specific assembly parsers. 270class MCTargetAsmParser : public MCAsmParserExtension { 271public: 272 enum MatchResultTy { 273 Match_InvalidOperand, 274 Match_MissingFeature, 275 Match_MnemonicFail, 276 Match_Success, 277 Match_NearMisses, 278 FIRST_TARGET_MATCH_RESULT_TY 279 }; 280 281protected: // Can only create subclasses. 282 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, 283 const MCInstrInfo &MII); 284 285 /// Create a copy of STI and return a non-const reference to it. 286 MCSubtargetInfo ©STI(); 287 288 /// AvailableFeatures - The current set of available features. 289 uint64_t AvailableFeatures = 0; 290 291 /// ParsingInlineAsm - Are we parsing ms-style inline assembly? 292 bool ParsingInlineAsm = false; 293 294 /// SemaCallback - The Sema callback implementation. Must be set when parsing 295 /// ms-style inline assembly. 296 MCAsmParserSemaCallback *SemaCallback; 297 298 /// Set of options which affects instrumentation of inline assembly. 299 MCTargetOptions MCOptions; 300 301 /// Current STI. 302 const MCSubtargetInfo *STI; 303 304 const MCInstrInfo &MII; 305 306public: 307 MCTargetAsmParser(const MCTargetAsmParser &) = delete; 308 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; 309 310 ~MCTargetAsmParser() override; 311 312 const MCSubtargetInfo &getSTI() const; 313 314 uint64_t getAvailableFeatures() const { return AvailableFeatures; } 315 void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; } 316 317 bool isParsingInlineAsm () { return ParsingInlineAsm; } 318 void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; } 319 320 MCTargetOptions getTargetOptions() const { return MCOptions; } 321 322 void setSemaCallback(MCAsmParserSemaCallback *Callback) { 323 SemaCallback = Callback; 324 } 325 326 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 327 SMLoc &EndLoc) = 0; 328 329 /// Sets frame register corresponding to the current MachineFunction. 330 virtual void SetFrameRegister(unsigned RegNo) {} 331 332 /// ParseInstruction - Parse one assembly instruction. 333 /// 334 /// The parser is positioned following the instruction name. The target 335 /// specific instruction parser should parse the entire instruction and 336 /// construct the appropriate MCInst, or emit an error. On success, the entire 337 /// line should be parsed up to and including the end-of-statement token. On 338 /// failure, the parser is not required to read to the end of the line. 339 // 340 /// \param Name - The instruction name. 341 /// \param NameLoc - The source location of the name. 342 /// \param Operands [out] - The list of parsed operands, this returns 343 /// ownership of them to the caller. 344 /// \return True on failure. 345 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 346 SMLoc NameLoc, OperandVector &Operands) = 0; 347 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 348 AsmToken Token, OperandVector &Operands) { 349 return ParseInstruction(Info, Name, Token.getLoc(), Operands); 350 } 351 352 /// ParseDirective - Parse a target specific assembler directive 353 /// 354 /// The parser is positioned following the directive name. The target 355 /// specific directive parser should parse the entire directive doing or 356 /// recording any target specific work, or return true and do nothing if the 357 /// directive is not target specific. If the directive is specific for 358 /// the target, the entire line is parsed up to and including the 359 /// end-of-statement token and false is returned. 360 /// 361 /// \param DirectiveID - the identifier token of the directive. 362 virtual bool ParseDirective(AsmToken DirectiveID) = 0; 363 364 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed 365 /// instruction as an actual MCInst and emit it to the specified MCStreamer. 366 /// This returns false on success and returns true on failure to match. 367 /// 368 /// On failure, the target parser is responsible for emitting a diagnostic 369 /// explaining the match failure. 370 virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 371 OperandVector &Operands, MCStreamer &Out, 372 uint64_t &ErrorInfo, 373 bool MatchingInlineAsm) = 0; 374 375 /// Allows targets to let registers opt out of clobber lists. 376 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; } 377 378 /// Allow a target to add special case operand matching for things that 379 /// tblgen doesn't/can't handle effectively. For example, literal 380 /// immediates on ARM. TableGen expects a token operand, but the parser 381 /// will recognize them as immediates. 382 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 383 unsigned Kind) { 384 return Match_InvalidOperand; 385 } 386 387 /// Validate the instruction match against any complex target predicates 388 /// before rendering any operands to it. 389 virtual unsigned 390 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { 391 return Match_Success; 392 } 393 394 /// checkTargetMatchPredicate - Validate the instruction match against 395 /// any complex target predicates not expressible via match classes. 396 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { 397 return Match_Success; 398 } 399 400 virtual void convertToMapAndConstraints(unsigned Kind, 401 const OperandVector &Operands) = 0; 402 403 // Return whether this parser uses assignment statements with equals tokens 404 virtual bool equalIsAsmAssignment() { return true; }; 405 // Return whether this start of statement identifier is a label 406 virtual bool isLabel(AsmToken &Token) { return true; }; 407 // Return whether this parser accept star as start of statement 408 virtual bool starIsStartOfStatement() { return false; }; 409 410 virtual const MCExpr *applyModifierToExpr(const MCExpr *E, 411 MCSymbolRefExpr::VariantKind, 412 MCContext &Ctx) { 413 return nullptr; 414 } 415 416 virtual void onLabelParsed(MCSymbol *Symbol) {} 417 418 /// Ensure that all previously parsed instructions have been emitted to the 419 /// output streamer, if the target does not emit them immediately. 420 virtual void flushPendingInstructions(MCStreamer &Out) {} 421 422 virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E, 423 AsmToken::TokenKind OperatorToken, 424 MCContext &Ctx) { 425 return nullptr; 426 } 427}; 428 429} // end namespace llvm 430 431#endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 432