X86AsmParser.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "MCTargetDesc/X86BaseInfo.h" 11#include "X86AsmInstrumentation.h" 12#include "X86AsmParserCommon.h" 13#include "X86Operand.h" 14#include "llvm/ADT/APFloat.h" 15#include "llvm/ADT/STLExtras.h" 16#include "llvm/ADT/SmallString.h" 17#include "llvm/ADT/SmallVector.h" 18#include "llvm/ADT/StringSwitch.h" 19#include "llvm/ADT/Twine.h" 20#include "llvm/MC/MCContext.h" 21#include "llvm/MC/MCExpr.h" 22#include "llvm/MC/MCInst.h" 23#include "llvm/MC/MCParser/MCAsmLexer.h" 24#include "llvm/MC/MCParser/MCAsmParser.h" 25#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 26#include "llvm/MC/MCRegisterInfo.h" 27#include "llvm/MC/MCStreamer.h" 28#include "llvm/MC/MCSubtargetInfo.h" 29#include "llvm/MC/MCSymbol.h" 30#include "llvm/MC/MCTargetAsmParser.h" 31#include "llvm/Support/SourceMgr.h" 32#include "llvm/Support/TargetRegistry.h" 33#include "llvm/Support/raw_ostream.h" 34#include <memory> 35 36using namespace llvm; 37 38namespace { 39 40static const char OpPrecedence[] = { 41 0, // IC_OR 42 1, // IC_AND 43 2, // IC_LSHIFT 44 2, // IC_RSHIFT 45 3, // IC_PLUS 46 3, // IC_MINUS 47 4, // IC_MULTIPLY 48 4, // IC_DIVIDE 49 5, // IC_RPAREN 50 6, // IC_LPAREN 51 0, // IC_IMM 52 0 // IC_REGISTER 53}; 54 55class X86AsmParser : public MCTargetAsmParser { 56 MCSubtargetInfo &STI; 57 MCAsmParser &Parser; 58 ParseInstructionInfo *InstInfo; 59 std::unique_ptr<X86AsmInstrumentation> Instrumentation; 60private: 61 SMLoc consumeToken() { 62 SMLoc Result = Parser.getTok().getLoc(); 63 Parser.Lex(); 64 return Result; 65 } 66 67 enum InfixCalculatorTok { 68 IC_OR = 0, 69 IC_AND, 70 IC_LSHIFT, 71 IC_RSHIFT, 72 IC_PLUS, 73 IC_MINUS, 74 IC_MULTIPLY, 75 IC_DIVIDE, 76 IC_RPAREN, 77 IC_LPAREN, 78 IC_IMM, 79 IC_REGISTER 80 }; 81 82 class InfixCalculator { 83 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 84 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 85 SmallVector<ICToken, 4> PostfixStack; 86 87 public: 88 int64_t popOperand() { 89 assert (!PostfixStack.empty() && "Poped an empty stack!"); 90 ICToken Op = PostfixStack.pop_back_val(); 91 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) 92 && "Expected and immediate or register!"); 93 return Op.second; 94 } 95 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 96 assert ((Op == IC_IMM || Op == IC_REGISTER) && 97 "Unexpected operand!"); 98 PostfixStack.push_back(std::make_pair(Op, Val)); 99 } 100 101 void popOperator() { InfixOperatorStack.pop_back(); } 102 void pushOperator(InfixCalculatorTok Op) { 103 // Push the new operator if the stack is empty. 104 if (InfixOperatorStack.empty()) { 105 InfixOperatorStack.push_back(Op); 106 return; 107 } 108 109 // Push the new operator if it has a higher precedence than the operator 110 // on the top of the stack or the operator on the top of the stack is a 111 // left parentheses. 112 unsigned Idx = InfixOperatorStack.size() - 1; 113 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 114 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 115 InfixOperatorStack.push_back(Op); 116 return; 117 } 118 119 // The operator on the top of the stack has higher precedence than the 120 // new operator. 121 unsigned ParenCount = 0; 122 while (1) { 123 // Nothing to process. 124 if (InfixOperatorStack.empty()) 125 break; 126 127 Idx = InfixOperatorStack.size() - 1; 128 StackOp = InfixOperatorStack[Idx]; 129 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 130 break; 131 132 // If we have an even parentheses count and we see a left parentheses, 133 // then stop processing. 134 if (!ParenCount && StackOp == IC_LPAREN) 135 break; 136 137 if (StackOp == IC_RPAREN) { 138 ++ParenCount; 139 InfixOperatorStack.pop_back(); 140 } else if (StackOp == IC_LPAREN) { 141 --ParenCount; 142 InfixOperatorStack.pop_back(); 143 } else { 144 InfixOperatorStack.pop_back(); 145 PostfixStack.push_back(std::make_pair(StackOp, 0)); 146 } 147 } 148 // Push the new operator. 149 InfixOperatorStack.push_back(Op); 150 } 151 int64_t execute() { 152 // Push any remaining operators onto the postfix stack. 153 while (!InfixOperatorStack.empty()) { 154 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 155 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 156 PostfixStack.push_back(std::make_pair(StackOp, 0)); 157 } 158 159 if (PostfixStack.empty()) 160 return 0; 161 162 SmallVector<ICToken, 16> OperandStack; 163 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { 164 ICToken Op = PostfixStack[i]; 165 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 166 OperandStack.push_back(Op); 167 } else { 168 assert (OperandStack.size() > 1 && "Too few operands."); 169 int64_t Val; 170 ICToken Op2 = OperandStack.pop_back_val(); 171 ICToken Op1 = OperandStack.pop_back_val(); 172 switch (Op.first) { 173 default: 174 report_fatal_error("Unexpected operator!"); 175 break; 176 case IC_PLUS: 177 Val = Op1.second + Op2.second; 178 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 179 break; 180 case IC_MINUS: 181 Val = Op1.second - Op2.second; 182 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 183 break; 184 case IC_MULTIPLY: 185 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 186 "Multiply operation with an immediate and a register!"); 187 Val = Op1.second * Op2.second; 188 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 189 break; 190 case IC_DIVIDE: 191 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 192 "Divide operation with an immediate and a register!"); 193 assert (Op2.second != 0 && "Division by zero!"); 194 Val = Op1.second / Op2.second; 195 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 196 break; 197 case IC_OR: 198 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 199 "Or operation with an immediate and a register!"); 200 Val = Op1.second | Op2.second; 201 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 202 break; 203 case IC_AND: 204 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 205 "And operation with an immediate and a register!"); 206 Val = Op1.second & Op2.second; 207 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 208 break; 209 case IC_LSHIFT: 210 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 211 "Left shift operation with an immediate and a register!"); 212 Val = Op1.second << Op2.second; 213 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 214 break; 215 case IC_RSHIFT: 216 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 217 "Right shift operation with an immediate and a register!"); 218 Val = Op1.second >> Op2.second; 219 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 220 break; 221 } 222 } 223 } 224 assert (OperandStack.size() == 1 && "Expected a single result."); 225 return OperandStack.pop_back_val().second; 226 } 227 }; 228 229 enum IntelExprState { 230 IES_OR, 231 IES_AND, 232 IES_LSHIFT, 233 IES_RSHIFT, 234 IES_PLUS, 235 IES_MINUS, 236 IES_MULTIPLY, 237 IES_DIVIDE, 238 IES_LBRAC, 239 IES_RBRAC, 240 IES_LPAREN, 241 IES_RPAREN, 242 IES_REGISTER, 243 IES_INTEGER, 244 IES_IDENTIFIER, 245 IES_ERROR 246 }; 247 248 class IntelExprStateMachine { 249 IntelExprState State, PrevState; 250 unsigned BaseReg, IndexReg, TmpReg, Scale; 251 int64_t Imm; 252 const MCExpr *Sym; 253 StringRef SymName; 254 bool StopOnLBrac, AddImmPrefix; 255 InfixCalculator IC; 256 InlineAsmIdentifierInfo Info; 257 public: 258 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : 259 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), 260 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac), 261 AddImmPrefix(addimmprefix) { Info.clear(); } 262 263 unsigned getBaseReg() { return BaseReg; } 264 unsigned getIndexReg() { return IndexReg; } 265 unsigned getScale() { return Scale; } 266 const MCExpr *getSym() { return Sym; } 267 StringRef getSymName() { return SymName; } 268 int64_t getImm() { return Imm + IC.execute(); } 269 bool isValidEndState() { 270 return State == IES_RBRAC || State == IES_INTEGER; 271 } 272 bool getStopOnLBrac() { return StopOnLBrac; } 273 bool getAddImmPrefix() { return AddImmPrefix; } 274 bool hadError() { return State == IES_ERROR; } 275 276 InlineAsmIdentifierInfo &getIdentifierInfo() { 277 return Info; 278 } 279 280 void onOr() { 281 IntelExprState CurrState = State; 282 switch (State) { 283 default: 284 State = IES_ERROR; 285 break; 286 case IES_INTEGER: 287 case IES_RPAREN: 288 case IES_REGISTER: 289 State = IES_OR; 290 IC.pushOperator(IC_OR); 291 break; 292 } 293 PrevState = CurrState; 294 } 295 void onAnd() { 296 IntelExprState CurrState = State; 297 switch (State) { 298 default: 299 State = IES_ERROR; 300 break; 301 case IES_INTEGER: 302 case IES_RPAREN: 303 case IES_REGISTER: 304 State = IES_AND; 305 IC.pushOperator(IC_AND); 306 break; 307 } 308 PrevState = CurrState; 309 } 310 void onLShift() { 311 IntelExprState CurrState = State; 312 switch (State) { 313 default: 314 State = IES_ERROR; 315 break; 316 case IES_INTEGER: 317 case IES_RPAREN: 318 case IES_REGISTER: 319 State = IES_LSHIFT; 320 IC.pushOperator(IC_LSHIFT); 321 break; 322 } 323 PrevState = CurrState; 324 } 325 void onRShift() { 326 IntelExprState CurrState = State; 327 switch (State) { 328 default: 329 State = IES_ERROR; 330 break; 331 case IES_INTEGER: 332 case IES_RPAREN: 333 case IES_REGISTER: 334 State = IES_RSHIFT; 335 IC.pushOperator(IC_RSHIFT); 336 break; 337 } 338 PrevState = CurrState; 339 } 340 void onPlus() { 341 IntelExprState CurrState = State; 342 switch (State) { 343 default: 344 State = IES_ERROR; 345 break; 346 case IES_INTEGER: 347 case IES_RPAREN: 348 case IES_REGISTER: 349 State = IES_PLUS; 350 IC.pushOperator(IC_PLUS); 351 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 352 // If we already have a BaseReg, then assume this is the IndexReg with 353 // a scale of 1. 354 if (!BaseReg) { 355 BaseReg = TmpReg; 356 } else { 357 assert (!IndexReg && "BaseReg/IndexReg already set!"); 358 IndexReg = TmpReg; 359 Scale = 1; 360 } 361 } 362 break; 363 } 364 PrevState = CurrState; 365 } 366 void onMinus() { 367 IntelExprState CurrState = State; 368 switch (State) { 369 default: 370 State = IES_ERROR; 371 break; 372 case IES_PLUS: 373 case IES_MULTIPLY: 374 case IES_DIVIDE: 375 case IES_LPAREN: 376 case IES_RPAREN: 377 case IES_LBRAC: 378 case IES_RBRAC: 379 case IES_INTEGER: 380 case IES_REGISTER: 381 State = IES_MINUS; 382 // Only push the minus operator if it is not a unary operator. 383 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || 384 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || 385 CurrState == IES_LPAREN || CurrState == IES_LBRAC)) 386 IC.pushOperator(IC_MINUS); 387 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 388 // If we already have a BaseReg, then assume this is the IndexReg with 389 // a scale of 1. 390 if (!BaseReg) { 391 BaseReg = TmpReg; 392 } else { 393 assert (!IndexReg && "BaseReg/IndexReg already set!"); 394 IndexReg = TmpReg; 395 Scale = 1; 396 } 397 } 398 break; 399 } 400 PrevState = CurrState; 401 } 402 void onRegister(unsigned Reg) { 403 IntelExprState CurrState = State; 404 switch (State) { 405 default: 406 State = IES_ERROR; 407 break; 408 case IES_PLUS: 409 case IES_LPAREN: 410 State = IES_REGISTER; 411 TmpReg = Reg; 412 IC.pushOperand(IC_REGISTER); 413 break; 414 case IES_MULTIPLY: 415 // Index Register - Scale * Register 416 if (PrevState == IES_INTEGER) { 417 assert (!IndexReg && "IndexReg already set!"); 418 State = IES_REGISTER; 419 IndexReg = Reg; 420 // Get the scale and replace the 'Scale * Register' with '0'. 421 Scale = IC.popOperand(); 422 IC.pushOperand(IC_IMM); 423 IC.popOperator(); 424 } else { 425 State = IES_ERROR; 426 } 427 break; 428 } 429 PrevState = CurrState; 430 } 431 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) { 432 PrevState = State; 433 switch (State) { 434 default: 435 State = IES_ERROR; 436 break; 437 case IES_PLUS: 438 case IES_MINUS: 439 State = IES_INTEGER; 440 Sym = SymRef; 441 SymName = SymRefName; 442 IC.pushOperand(IC_IMM); 443 break; 444 } 445 } 446 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 447 IntelExprState CurrState = State; 448 switch (State) { 449 default: 450 State = IES_ERROR; 451 break; 452 case IES_PLUS: 453 case IES_MINUS: 454 case IES_OR: 455 case IES_AND: 456 case IES_LSHIFT: 457 case IES_RSHIFT: 458 case IES_DIVIDE: 459 case IES_MULTIPLY: 460 case IES_LPAREN: 461 State = IES_INTEGER; 462 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 463 // Index Register - Register * Scale 464 assert (!IndexReg && "IndexReg already set!"); 465 IndexReg = TmpReg; 466 Scale = TmpInt; 467 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 468 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 469 return true; 470 } 471 // Get the scale and replace the 'Register * Scale' with '0'. 472 IC.popOperator(); 473 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 474 PrevState == IES_OR || PrevState == IES_AND || 475 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 476 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 477 PrevState == IES_LPAREN || PrevState == IES_LBRAC) && 478 CurrState == IES_MINUS) { 479 // Unary minus. No need to pop the minus operand because it was never 480 // pushed. 481 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. 482 } else { 483 IC.pushOperand(IC_IMM, TmpInt); 484 } 485 break; 486 } 487 PrevState = CurrState; 488 return false; 489 } 490 void onStar() { 491 PrevState = State; 492 switch (State) { 493 default: 494 State = IES_ERROR; 495 break; 496 case IES_INTEGER: 497 case IES_REGISTER: 498 case IES_RPAREN: 499 State = IES_MULTIPLY; 500 IC.pushOperator(IC_MULTIPLY); 501 break; 502 } 503 } 504 void onDivide() { 505 PrevState = State; 506 switch (State) { 507 default: 508 State = IES_ERROR; 509 break; 510 case IES_INTEGER: 511 case IES_RPAREN: 512 State = IES_DIVIDE; 513 IC.pushOperator(IC_DIVIDE); 514 break; 515 } 516 } 517 void onLBrac() { 518 PrevState = State; 519 switch (State) { 520 default: 521 State = IES_ERROR; 522 break; 523 case IES_RBRAC: 524 State = IES_PLUS; 525 IC.pushOperator(IC_PLUS); 526 break; 527 } 528 } 529 void onRBrac() { 530 IntelExprState CurrState = State; 531 switch (State) { 532 default: 533 State = IES_ERROR; 534 break; 535 case IES_INTEGER: 536 case IES_REGISTER: 537 case IES_RPAREN: 538 State = IES_RBRAC; 539 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 540 // If we already have a BaseReg, then assume this is the IndexReg with 541 // a scale of 1. 542 if (!BaseReg) { 543 BaseReg = TmpReg; 544 } else { 545 assert (!IndexReg && "BaseReg/IndexReg already set!"); 546 IndexReg = TmpReg; 547 Scale = 1; 548 } 549 } 550 break; 551 } 552 PrevState = CurrState; 553 } 554 void onLParen() { 555 IntelExprState CurrState = State; 556 switch (State) { 557 default: 558 State = IES_ERROR; 559 break; 560 case IES_PLUS: 561 case IES_MINUS: 562 case IES_OR: 563 case IES_AND: 564 case IES_LSHIFT: 565 case IES_RSHIFT: 566 case IES_MULTIPLY: 567 case IES_DIVIDE: 568 case IES_LPAREN: 569 // FIXME: We don't handle this type of unary minus, yet. 570 if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 571 PrevState == IES_OR || PrevState == IES_AND || 572 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 573 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 574 PrevState == IES_LPAREN || PrevState == IES_LBRAC) && 575 CurrState == IES_MINUS) { 576 State = IES_ERROR; 577 break; 578 } 579 State = IES_LPAREN; 580 IC.pushOperator(IC_LPAREN); 581 break; 582 } 583 PrevState = CurrState; 584 } 585 void onRParen() { 586 PrevState = State; 587 switch (State) { 588 default: 589 State = IES_ERROR; 590 break; 591 case IES_INTEGER: 592 case IES_REGISTER: 593 case IES_RPAREN: 594 State = IES_RPAREN; 595 IC.pushOperator(IC_RPAREN); 596 break; 597 } 598 } 599 }; 600 601 MCAsmParser &getParser() const { return Parser; } 602 603 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 604 605 bool Error(SMLoc L, const Twine &Msg, 606 ArrayRef<SMRange> Ranges = None, 607 bool MatchingInlineAsm = false) { 608 if (MatchingInlineAsm) return true; 609 return Parser.Error(L, Msg, Ranges); 610 } 611 612 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg, 613 ArrayRef<SMRange> Ranges = None, 614 bool MatchingInlineAsm = false) { 615 Parser.eatToEndOfStatement(); 616 return Error(L, Msg, Ranges, MatchingInlineAsm); 617 } 618 619 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) { 620 Error(Loc, Msg); 621 return 0; 622 } 623 624 X86Operand *DefaultMemSIOperand(SMLoc Loc); 625 X86Operand *DefaultMemDIOperand(SMLoc Loc); 626 X86Operand *ParseOperand(); 627 X86Operand *ParseATTOperand(); 628 X86Operand *ParseIntelOperand(); 629 X86Operand *ParseIntelOffsetOfOperator(); 630 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); 631 X86Operand *ParseIntelOperator(unsigned OpKind); 632 X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size); 633 X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, 634 unsigned Size); 635 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 636 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start, 637 int64_t ImmDisp, unsigned Size); 638 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, 639 InlineAsmIdentifierInfo &Info, 640 bool IsUnevaluatedOperand, SMLoc &End); 641 642 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 643 644 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, 645 unsigned BaseReg, unsigned IndexReg, 646 unsigned Scale, SMLoc Start, SMLoc End, 647 unsigned Size, StringRef Identifier, 648 InlineAsmIdentifierInfo &Info); 649 650 bool ParseDirectiveWord(unsigned Size, SMLoc L); 651 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 652 653 bool processInstruction(MCInst &Inst, 654 const SmallVectorImpl<MCParsedAsmOperand*> &Ops); 655 656 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds 657 /// instrumentation around Inst. 658 void EmitInstruction(MCInst &Inst, 659 SmallVectorImpl<MCParsedAsmOperand *> &Operands, 660 MCStreamer &Out); 661 662 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 663 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 664 MCStreamer &Out, unsigned &ErrorInfo, 665 bool MatchingInlineAsm) override; 666 667 /// doSrcDstMatch - Returns true if operands are matching in their 668 /// word size (%si and %di, %esi and %edi, etc.). Order depends on 669 /// the parsing mode (Intel vs. AT&T). 670 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2); 671 672 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 673 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 674 /// \return \c true if no parsing errors occurred, \c false otherwise. 675 bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, 676 const MCParsedAsmOperand &Op); 677 678 bool is64BitMode() const { 679 // FIXME: Can tablegen auto-generate this? 680 return (STI.getFeatureBits() & X86::Mode64Bit) != 0; 681 } 682 bool is32BitMode() const { 683 // FIXME: Can tablegen auto-generate this? 684 return (STI.getFeatureBits() & X86::Mode32Bit) != 0; 685 } 686 bool is16BitMode() const { 687 // FIXME: Can tablegen auto-generate this? 688 return (STI.getFeatureBits() & X86::Mode16Bit) != 0; 689 } 690 void SwitchMode(uint64_t mode) { 691 uint64_t oldMode = STI.getFeatureBits() & 692 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit); 693 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode)); 694 setAvailableFeatures(FB); 695 assert(mode == (STI.getFeatureBits() & 696 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit))); 697 } 698 699 bool isParsingIntelSyntax() { 700 return getParser().getAssemblerDialect(); 701 } 702 703 /// @name Auto-generated Matcher Functions 704 /// { 705 706#define GET_ASSEMBLER_HEADER 707#include "X86GenAsmMatcher.inc" 708 709 /// } 710 711public: 712 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, 713 const MCInstrInfo &MII) 714 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { 715 716 // Initialize the set of available features. 717 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); 718 Instrumentation.reset(CreateX86AsmInstrumentation(STI)); 719 } 720 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 721 722 bool 723 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, 724 SmallVectorImpl<MCParsedAsmOperand*> &Operands) override; 725 726 bool ParseDirective(AsmToken DirectiveID) override; 727}; 728} // end anonymous namespace 729 730/// @name Auto-generated Match Functions 731/// { 732 733static unsigned MatchRegisterName(StringRef Name); 734 735/// } 736 737static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg, 738 StringRef &ErrMsg) { 739 // If we have both a base register and an index register make sure they are 740 // both 64-bit or 32-bit registers. 741 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 742 if (BaseReg != 0 && IndexReg != 0) { 743 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 744 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 745 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && 746 IndexReg != X86::RIZ) { 747 ErrMsg = "base register is 64-bit, but index register is not"; 748 return true; 749 } 750 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 751 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 752 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && 753 IndexReg != X86::EIZ){ 754 ErrMsg = "base register is 32-bit, but index register is not"; 755 return true; 756 } 757 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 758 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 759 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 760 ErrMsg = "base register is 16-bit, but index register is not"; 761 return true; 762 } 763 if (((BaseReg == X86::BX || BaseReg == X86::BP) && 764 IndexReg != X86::SI && IndexReg != X86::DI) || 765 ((BaseReg == X86::SI || BaseReg == X86::DI) && 766 IndexReg != X86::BX && IndexReg != X86::BP)) { 767 ErrMsg = "invalid 16-bit base/index register combination"; 768 return true; 769 } 770 } 771 } 772 return false; 773} 774 775bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2) 776{ 777 // Return true and let a normal complaint about bogus operands happen. 778 if (!Op1.isMem() || !Op2.isMem()) 779 return true; 780 781 // Actually these might be the other way round if Intel syntax is 782 // being used. It doesn't matter. 783 unsigned diReg = Op1.Mem.BaseReg; 784 unsigned siReg = Op2.Mem.BaseReg; 785 786 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg)) 787 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg); 788 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg)) 789 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg); 790 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg)) 791 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg); 792 // Again, return true and let another error happen. 793 return true; 794} 795 796bool X86AsmParser::ParseRegister(unsigned &RegNo, 797 SMLoc &StartLoc, SMLoc &EndLoc) { 798 RegNo = 0; 799 const AsmToken &PercentTok = Parser.getTok(); 800 StartLoc = PercentTok.getLoc(); 801 802 // If we encounter a %, ignore it. This code handles registers with and 803 // without the prefix, unprefixed registers can occur in cfi directives. 804 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) 805 Parser.Lex(); // Eat percent token. 806 807 const AsmToken &Tok = Parser.getTok(); 808 EndLoc = Tok.getEndLoc(); 809 810 if (Tok.isNot(AsmToken::Identifier)) { 811 if (isParsingIntelSyntax()) return true; 812 return Error(StartLoc, "invalid register name", 813 SMRange(StartLoc, EndLoc)); 814 } 815 816 RegNo = MatchRegisterName(Tok.getString()); 817 818 // If the match failed, try the register name as lowercase. 819 if (RegNo == 0) 820 RegNo = MatchRegisterName(Tok.getString().lower()); 821 822 if (!is64BitMode()) { 823 // FIXME: This should be done using Requires<Not64BitMode> and 824 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 825 // checked. 826 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 827 // REX prefix. 828 if (RegNo == X86::RIZ || 829 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 830 X86II::isX86_64NonExtLowByteReg(RegNo) || 831 X86II::isX86_64ExtendedReg(RegNo)) 832 return Error(StartLoc, "register %" 833 + Tok.getString() + " is only available in 64-bit mode", 834 SMRange(StartLoc, EndLoc)); 835 } 836 837 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 838 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { 839 RegNo = X86::ST0; 840 Parser.Lex(); // Eat 'st' 841 842 // Check to see if we have '(4)' after %st. 843 if (getLexer().isNot(AsmToken::LParen)) 844 return false; 845 // Lex the paren. 846 getParser().Lex(); 847 848 const AsmToken &IntTok = Parser.getTok(); 849 if (IntTok.isNot(AsmToken::Integer)) 850 return Error(IntTok.getLoc(), "expected stack index"); 851 switch (IntTok.getIntVal()) { 852 case 0: RegNo = X86::ST0; break; 853 case 1: RegNo = X86::ST1; break; 854 case 2: RegNo = X86::ST2; break; 855 case 3: RegNo = X86::ST3; break; 856 case 4: RegNo = X86::ST4; break; 857 case 5: RegNo = X86::ST5; break; 858 case 6: RegNo = X86::ST6; break; 859 case 7: RegNo = X86::ST7; break; 860 default: return Error(IntTok.getLoc(), "invalid stack index"); 861 } 862 863 if (getParser().Lex().isNot(AsmToken::RParen)) 864 return Error(Parser.getTok().getLoc(), "expected ')'"); 865 866 EndLoc = Parser.getTok().getEndLoc(); 867 Parser.Lex(); // Eat ')' 868 return false; 869 } 870 871 EndLoc = Parser.getTok().getEndLoc(); 872 873 // If this is "db[0-7]", match it as an alias 874 // for dr[0-7]. 875 if (RegNo == 0 && Tok.getString().size() == 3 && 876 Tok.getString().startswith("db")) { 877 switch (Tok.getString()[2]) { 878 case '0': RegNo = X86::DR0; break; 879 case '1': RegNo = X86::DR1; break; 880 case '2': RegNo = X86::DR2; break; 881 case '3': RegNo = X86::DR3; break; 882 case '4': RegNo = X86::DR4; break; 883 case '5': RegNo = X86::DR5; break; 884 case '6': RegNo = X86::DR6; break; 885 case '7': RegNo = X86::DR7; break; 886 } 887 888 if (RegNo != 0) { 889 EndLoc = Parser.getTok().getEndLoc(); 890 Parser.Lex(); // Eat it. 891 return false; 892 } 893 } 894 895 if (RegNo == 0) { 896 if (isParsingIntelSyntax()) return true; 897 return Error(StartLoc, "invalid register name", 898 SMRange(StartLoc, EndLoc)); 899 } 900 901 Parser.Lex(); // Eat identifier token. 902 return false; 903} 904 905X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 906 unsigned basereg = 907 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI); 908 const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); 909 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg, 910 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); 911} 912 913X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 914 unsigned basereg = 915 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI); 916 const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); 917 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg, 918 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); 919} 920 921X86Operand *X86AsmParser::ParseOperand() { 922 if (isParsingIntelSyntax()) 923 return ParseIntelOperand(); 924 return ParseATTOperand(); 925} 926 927/// getIntelMemOperandSize - Return intel memory operand size. 928static unsigned getIntelMemOperandSize(StringRef OpStr) { 929 unsigned Size = StringSwitch<unsigned>(OpStr) 930 .Cases("BYTE", "byte", 8) 931 .Cases("WORD", "word", 16) 932 .Cases("DWORD", "dword", 32) 933 .Cases("QWORD", "qword", 64) 934 .Cases("XWORD", "xword", 80) 935 .Cases("XMMWORD", "xmmword", 128) 936 .Cases("YMMWORD", "ymmword", 256) 937 .Cases("ZMMWORD", "zmmword", 512) 938 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter 939 .Default(0); 940 return Size; 941} 942 943X86Operand * 944X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, 945 unsigned BaseReg, unsigned IndexReg, 946 unsigned Scale, SMLoc Start, SMLoc End, 947 unsigned Size, StringRef Identifier, 948 InlineAsmIdentifierInfo &Info){ 949 // If this is not a VarDecl then assume it is a FuncDecl or some other label 950 // reference. We need an 'r' constraint here, so we need to create register 951 // operand to ensure proper matching. Just pick a GPR based on the size of 952 // a pointer. 953 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) { 954 unsigned RegNo = 955 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX); 956 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, 957 SMLoc(), Identifier, Info.OpDecl); 958 } 959 960 // We either have a direct symbol reference, or an offset from a symbol. The 961 // parser always puts the symbol on the LHS, so look there for size 962 // calculation purposes. 963 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp); 964 bool IsSymRef = 965 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp); 966 if (IsSymRef) { 967 if (!Size) { 968 Size = Info.Type * 8; // Size is in terms of bits in this context. 969 if (Size) 970 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start, 971 /*Len=*/0, Size)); 972 } 973 } 974 975 // When parsing inline assembly we set the base register to a non-zero value 976 // if we don't know the actual value at this time. This is necessary to 977 // get the matching correct in some cases. 978 BaseReg = BaseReg ? BaseReg : 1; 979 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 980 End, Size, Identifier, Info.OpDecl); 981} 982 983static void 984RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites, 985 StringRef SymName, int64_t ImmDisp, 986 int64_t FinalImmDisp, SMLoc &BracLoc, 987 SMLoc &StartInBrac, SMLoc &End) { 988 // Remove the '[' and ']' from the IR string. 989 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1)); 990 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1)); 991 992 // If ImmDisp is non-zero, then we parsed a displacement before the 993 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) 994 // If ImmDisp doesn't match the displacement computed by the state machine 995 // then we have an additional displacement in the bracketed expression. 996 if (ImmDisp != FinalImmDisp) { 997 if (ImmDisp) { 998 // We have an immediate displacement before the bracketed expression. 999 // Adjust this to match the final immediate displacement. 1000 bool Found = false; 1001 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), 1002 E = AsmRewrites->end(); I != E; ++I) { 1003 if ((*I).Loc.getPointer() > BracLoc.getPointer()) 1004 continue; 1005 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) { 1006 assert (!Found && "ImmDisp already rewritten."); 1007 (*I).Kind = AOK_Imm; 1008 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer(); 1009 (*I).Val = FinalImmDisp; 1010 Found = true; 1011 break; 1012 } 1013 } 1014 assert (Found && "Unable to rewrite ImmDisp."); 1015 (void)Found; 1016 } else { 1017 // We have a symbolic and an immediate displacement, but no displacement 1018 // before the bracketed expression. Put the immediate displacement 1019 // before the bracketed expression. 1020 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp)); 1021 } 1022 } 1023 // Remove all the ImmPrefix rewrites within the brackets. 1024 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), 1025 E = AsmRewrites->end(); I != E; ++I) { 1026 if ((*I).Loc.getPointer() < StartInBrac.getPointer()) 1027 continue; 1028 if ((*I).Kind == AOK_ImmPrefix) 1029 (*I).Kind = AOK_Delete; 1030 } 1031 const char *SymLocPtr = SymName.data(); 1032 // Skip everything before the symbol. 1033 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { 1034 assert(Len > 0 && "Expected a non-negative length."); 1035 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len)); 1036 } 1037 // Skip everything after the symbol. 1038 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { 1039 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); 1040 assert(Len > 0 && "Expected a non-negative length."); 1041 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len)); 1042 } 1043} 1044 1045bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1046 const AsmToken &Tok = Parser.getTok(); 1047 1048 bool Done = false; 1049 while (!Done) { 1050 bool UpdateLocLex = true; 1051 1052 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an 1053 // identifier. Don't try an parse it as a register. 1054 if (Tok.getString().startswith(".")) 1055 break; 1056 1057 // If we're parsing an immediate expression, we don't expect a '['. 1058 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) 1059 break; 1060 1061 switch (getLexer().getKind()) { 1062 default: { 1063 if (SM.isValidEndState()) { 1064 Done = true; 1065 break; 1066 } 1067 return Error(Tok.getLoc(), "unknown token in expression"); 1068 } 1069 case AsmToken::EndOfStatement: { 1070 Done = true; 1071 break; 1072 } 1073 case AsmToken::Identifier: { 1074 // This could be a register or a symbolic displacement. 1075 unsigned TmpReg; 1076 const MCExpr *Val; 1077 SMLoc IdentLoc = Tok.getLoc(); 1078 StringRef Identifier = Tok.getString(); 1079 if(!ParseRegister(TmpReg, IdentLoc, End)) { 1080 SM.onRegister(TmpReg); 1081 UpdateLocLex = false; 1082 break; 1083 } else { 1084 if (!isParsingInlineAsm()) { 1085 if (getParser().parsePrimaryExpr(Val, End)) 1086 return Error(Tok.getLoc(), "Unexpected identifier!"); 1087 } else { 1088 // This is a dot operator, not an adjacent identifier. 1089 if (Identifier.find('.') != StringRef::npos) { 1090 return false; 1091 } else { 1092 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1093 if (ParseIntelIdentifier(Val, Identifier, Info, 1094 /*Unevaluated=*/false, End)) 1095 return true; 1096 } 1097 } 1098 SM.onIdentifierExpr(Val, Identifier); 1099 UpdateLocLex = false; 1100 break; 1101 } 1102 return Error(Tok.getLoc(), "Unexpected identifier!"); 1103 } 1104 case AsmToken::Integer: { 1105 StringRef ErrMsg; 1106 if (isParsingInlineAsm() && SM.getAddImmPrefix()) 1107 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, 1108 Tok.getLoc())); 1109 // Look for 'b' or 'f' following an Integer as a directional label 1110 SMLoc Loc = getTok().getLoc(); 1111 int64_t IntVal = getTok().getIntVal(); 1112 End = consumeToken(); 1113 UpdateLocLex = false; 1114 if (getLexer().getKind() == AsmToken::Identifier) { 1115 StringRef IDVal = getTok().getString(); 1116 if (IDVal == "f" || IDVal == "b") { 1117 MCSymbol *Sym = 1118 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b"); 1119 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1120 const MCExpr *Val = 1121 MCSymbolRefExpr::Create(Sym, Variant, getContext()); 1122 if (IDVal == "b" && Sym->isUndefined()) 1123 return Error(Loc, "invalid reference to undefined symbol"); 1124 StringRef Identifier = Sym->getName(); 1125 SM.onIdentifierExpr(Val, Identifier); 1126 End = consumeToken(); 1127 } else { 1128 if (SM.onInteger(IntVal, ErrMsg)) 1129 return Error(Loc, ErrMsg); 1130 } 1131 } else { 1132 if (SM.onInteger(IntVal, ErrMsg)) 1133 return Error(Loc, ErrMsg); 1134 } 1135 break; 1136 } 1137 case AsmToken::Plus: SM.onPlus(); break; 1138 case AsmToken::Minus: SM.onMinus(); break; 1139 case AsmToken::Star: SM.onStar(); break; 1140 case AsmToken::Slash: SM.onDivide(); break; 1141 case AsmToken::Pipe: SM.onOr(); break; 1142 case AsmToken::Amp: SM.onAnd(); break; 1143 case AsmToken::LessLess: 1144 SM.onLShift(); break; 1145 case AsmToken::GreaterGreater: 1146 SM.onRShift(); break; 1147 case AsmToken::LBrac: SM.onLBrac(); break; 1148 case AsmToken::RBrac: SM.onRBrac(); break; 1149 case AsmToken::LParen: SM.onLParen(); break; 1150 case AsmToken::RParen: SM.onRParen(); break; 1151 } 1152 if (SM.hadError()) 1153 return Error(Tok.getLoc(), "unknown token in expression"); 1154 1155 if (!Done && UpdateLocLex) 1156 End = consumeToken(); 1157 } 1158 return false; 1159} 1160 1161X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, 1162 int64_t ImmDisp, 1163 unsigned Size) { 1164 const AsmToken &Tok = Parser.getTok(); 1165 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); 1166 if (getLexer().isNot(AsmToken::LBrac)) 1167 return ErrorOperand(BracLoc, "Expected '[' token!"); 1168 Parser.Lex(); // Eat '[' 1169 1170 SMLoc StartInBrac = Tok.getLoc(); 1171 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We 1172 // may have already parsed an immediate displacement before the bracketed 1173 // expression. 1174 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); 1175 if (ParseIntelExpression(SM, End)) 1176 return 0; 1177 1178 const MCExpr *Disp = 0; 1179 if (const MCExpr *Sym = SM.getSym()) { 1180 // A symbolic displacement. 1181 Disp = Sym; 1182 if (isParsingInlineAsm()) 1183 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(), 1184 ImmDisp, SM.getImm(), BracLoc, StartInBrac, 1185 End); 1186 } 1187 1188 if (SM.getImm() || !Disp) { 1189 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext()); 1190 if (Disp) 1191 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext()); 1192 else 1193 Disp = Imm; // An immediate displacement only. 1194 } 1195 1196 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC 1197 // will in fact do global lookup the field name inside all global typedefs, 1198 // but we don't emulate that. 1199 if (Tok.getString().find('.') != StringRef::npos) { 1200 const MCExpr *NewDisp; 1201 if (ParseIntelDotOperator(Disp, NewDisp)) 1202 return 0; 1203 1204 End = Tok.getEndLoc(); 1205 Parser.Lex(); // Eat the field. 1206 Disp = NewDisp; 1207 } 1208 1209 int BaseReg = SM.getBaseReg(); 1210 int IndexReg = SM.getIndexReg(); 1211 int Scale = SM.getScale(); 1212 if (!isParsingInlineAsm()) { 1213 // handle [-42] 1214 if (!BaseReg && !IndexReg) { 1215 if (!SegReg) 1216 return X86Operand::CreateMem(Disp, Start, End, Size); 1217 else 1218 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); 1219 } 1220 StringRef ErrMsg; 1221 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { 1222 Error(StartInBrac, ErrMsg); 1223 return 0; 1224 } 1225 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1226 End, Size); 1227 } 1228 1229 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1230 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1231 End, Size, SM.getSymName(), Info); 1232} 1233 1234// Inline assembly may use variable names with namespace alias qualifiers. 1235bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, 1236 StringRef &Identifier, 1237 InlineAsmIdentifierInfo &Info, 1238 bool IsUnevaluatedOperand, SMLoc &End) { 1239 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly."); 1240 Val = 0; 1241 1242 StringRef LineBuf(Identifier.data()); 1243 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 1244 1245 const AsmToken &Tok = Parser.getTok(); 1246 1247 // Advance the token stream until the end of the current token is 1248 // after the end of what the frontend claimed. 1249 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 1250 while (true) { 1251 End = Tok.getEndLoc(); 1252 getLexer().Lex(); 1253 1254 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?"); 1255 if (End.getPointer() == EndPtr) break; 1256 } 1257 1258 // Create the symbol reference. 1259 Identifier = LineBuf; 1260 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier); 1261 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1262 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); 1263 return false; 1264} 1265 1266/// \brief Parse intel style segment override. 1267X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, 1268 SMLoc Start, 1269 unsigned Size) { 1270 assert(SegReg != 0 && "Tried to parse a segment override without a segment!"); 1271 const AsmToken &Tok = Parser.getTok(); // Eat colon. 1272 if (Tok.isNot(AsmToken::Colon)) 1273 return ErrorOperand(Tok.getLoc(), "Expected ':' token!"); 1274 Parser.Lex(); // Eat ':' 1275 1276 int64_t ImmDisp = 0; 1277 if (getLexer().is(AsmToken::Integer)) { 1278 ImmDisp = Tok.getIntVal(); 1279 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer. 1280 1281 if (isParsingInlineAsm()) 1282 InstInfo->AsmRewrites->push_back( 1283 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc())); 1284 1285 if (getLexer().isNot(AsmToken::LBrac)) { 1286 // An immediate following a 'segment register', 'colon' token sequence can 1287 // be followed by a bracketed expression. If it isn't we know we have our 1288 // final segment override. 1289 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext()); 1290 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0, 1291 /*Scale=*/1, Start, ImmDispToken.getEndLoc(), 1292 Size); 1293 } 1294 } 1295 1296 if (getLexer().is(AsmToken::LBrac)) 1297 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); 1298 1299 const MCExpr *Val; 1300 SMLoc End; 1301 if (!isParsingInlineAsm()) { 1302 if (getParser().parsePrimaryExpr(Val, End)) 1303 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1304 1305 return X86Operand::CreateMem(Val, Start, End, Size); 1306 } 1307 1308 InlineAsmIdentifierInfo Info; 1309 StringRef Identifier = Tok.getString(); 1310 if (ParseIntelIdentifier(Val, Identifier, Info, 1311 /*Unevaluated=*/false, End)) 1312 return 0; 1313 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, 1314 /*Scale=*/1, Start, End, Size, Identifier, Info); 1315} 1316 1317/// ParseIntelMemOperand - Parse intel style memory operand. 1318X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start, 1319 unsigned Size) { 1320 const AsmToken &Tok = Parser.getTok(); 1321 SMLoc End; 1322 1323 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. 1324 if (getLexer().is(AsmToken::LBrac)) 1325 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size); 1326 assert(ImmDisp == 0); 1327 1328 const MCExpr *Val; 1329 if (!isParsingInlineAsm()) { 1330 if (getParser().parsePrimaryExpr(Val, End)) 1331 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1332 1333 return X86Operand::CreateMem(Val, Start, End, Size); 1334 } 1335 1336 InlineAsmIdentifierInfo Info; 1337 StringRef Identifier = Tok.getString(); 1338 if (ParseIntelIdentifier(Val, Identifier, Info, 1339 /*Unevaluated=*/false, End)) 1340 return 0; 1341 1342 if (!getLexer().is(AsmToken::LBrac)) 1343 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0, 1344 /*Scale=*/1, Start, End, Size, Identifier, Info); 1345 1346 Parser.Lex(); // Eat '[' 1347 1348 // Parse Identifier [ ImmDisp ] 1349 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true, 1350 /*AddImmPrefix=*/false); 1351 if (ParseIntelExpression(SM, End)) 1352 return 0; 1353 1354 if (SM.getSym()) { 1355 Error(Start, "cannot use more than one symbol in memory operand"); 1356 return 0; 1357 } 1358 if (SM.getBaseReg()) { 1359 Error(Start, "cannot use base register with variable reference"); 1360 return 0; 1361 } 1362 if (SM.getIndexReg()) { 1363 Error(Start, "cannot use index register with variable reference"); 1364 return 0; 1365 } 1366 1367 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext()); 1368 // BaseReg is non-zero to avoid assertions. In the context of inline asm, 1369 // we're pointing to a local variable in memory, so the base register is 1370 // really the frame or stack pointer. 1371 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0, 1372 /*Scale=*/1, Start, End, Size, Identifier, 1373 Info.OpDecl); 1374} 1375 1376/// Parse the '.' operator. 1377bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, 1378 const MCExpr *&NewDisp) { 1379 const AsmToken &Tok = Parser.getTok(); 1380 int64_t OrigDispVal, DotDispVal; 1381 1382 // FIXME: Handle non-constant expressions. 1383 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) 1384 OrigDispVal = OrigDisp->getValue(); 1385 else 1386 return Error(Tok.getLoc(), "Non-constant offsets are not supported!"); 1387 1388 // Drop the optional '.'. 1389 StringRef DotDispStr = Tok.getString(); 1390 if (DotDispStr.startswith(".")) 1391 DotDispStr = DotDispStr.drop_front(1); 1392 1393 // .Imm gets lexed as a real. 1394 if (Tok.is(AsmToken::Real)) { 1395 APInt DotDisp; 1396 DotDispStr.getAsInteger(10, DotDisp); 1397 DotDispVal = DotDisp.getZExtValue(); 1398 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1399 unsigned DotDisp; 1400 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 1401 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, 1402 DotDisp)) 1403 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 1404 DotDispVal = DotDisp; 1405 } else 1406 return Error(Tok.getLoc(), "Unexpected token type!"); 1407 1408 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1409 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); 1410 unsigned Len = DotDispStr.size(); 1411 unsigned Val = OrigDispVal + DotDispVal; 1412 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len, 1413 Val)); 1414 } 1415 1416 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); 1417 return false; 1418} 1419 1420/// Parse the 'offset' operator. This operator is used to specify the 1421/// location rather then the content of a variable. 1422X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() { 1423 const AsmToken &Tok = Parser.getTok(); 1424 SMLoc OffsetOfLoc = Tok.getLoc(); 1425 Parser.Lex(); // Eat offset. 1426 1427 const MCExpr *Val; 1428 InlineAsmIdentifierInfo Info; 1429 SMLoc Start = Tok.getLoc(), End; 1430 StringRef Identifier = Tok.getString(); 1431 if (ParseIntelIdentifier(Val, Identifier, Info, 1432 /*Unevaluated=*/false, End)) 1433 return 0; 1434 1435 // Don't emit the offset operator. 1436 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); 1437 1438 // The offset operator will have an 'r' constraint, thus we need to create 1439 // register operand to ensure proper matching. Just pick a GPR based on 1440 // the size of a pointer. 1441 unsigned RegNo = 1442 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX); 1443 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, 1444 OffsetOfLoc, Identifier, Info.OpDecl); 1445} 1446 1447enum IntelOperatorKind { 1448 IOK_LENGTH, 1449 IOK_SIZE, 1450 IOK_TYPE 1451}; 1452 1453/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 1454/// returns the number of elements in an array. It returns the value 1 for 1455/// non-array variables. The SIZE operator returns the size of a C or C++ 1456/// variable. A variable's size is the product of its LENGTH and TYPE. The 1457/// TYPE operator returns the size of a C or C++ type or variable. If the 1458/// variable is an array, TYPE returns the size of a single element. 1459X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) { 1460 const AsmToken &Tok = Parser.getTok(); 1461 SMLoc TypeLoc = Tok.getLoc(); 1462 Parser.Lex(); // Eat operator. 1463 1464 const MCExpr *Val = 0; 1465 InlineAsmIdentifierInfo Info; 1466 SMLoc Start = Tok.getLoc(), End; 1467 StringRef Identifier = Tok.getString(); 1468 if (ParseIntelIdentifier(Val, Identifier, Info, 1469 /*Unevaluated=*/true, End)) 1470 return 0; 1471 1472 if (!Info.OpDecl) 1473 return ErrorOperand(Start, "unable to lookup expression"); 1474 1475 unsigned CVal = 0; 1476 switch(OpKind) { 1477 default: llvm_unreachable("Unexpected operand kind!"); 1478 case IOK_LENGTH: CVal = Info.Length; break; 1479 case IOK_SIZE: CVal = Info.Size; break; 1480 case IOK_TYPE: CVal = Info.Type; break; 1481 } 1482 1483 // Rewrite the type operator and the C or C++ type or variable in terms of an 1484 // immediate. E.g. TYPE foo -> $$4 1485 unsigned Len = End.getPointer() - TypeLoc.getPointer(); 1486 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); 1487 1488 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); 1489 return X86Operand::CreateImm(Imm, Start, End); 1490} 1491 1492X86Operand *X86AsmParser::ParseIntelOperand() { 1493 const AsmToken &Tok = Parser.getTok(); 1494 SMLoc Start, End; 1495 1496 // Offset, length, type and size operators. 1497 if (isParsingInlineAsm()) { 1498 StringRef AsmTokStr = Tok.getString(); 1499 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") 1500 return ParseIntelOffsetOfOperator(); 1501 if (AsmTokStr == "length" || AsmTokStr == "LENGTH") 1502 return ParseIntelOperator(IOK_LENGTH); 1503 if (AsmTokStr == "size" || AsmTokStr == "SIZE") 1504 return ParseIntelOperator(IOK_SIZE); 1505 if (AsmTokStr == "type" || AsmTokStr == "TYPE") 1506 return ParseIntelOperator(IOK_TYPE); 1507 } 1508 1509 unsigned Size = getIntelMemOperandSize(Tok.getString()); 1510 if (Size) { 1511 Parser.Lex(); // Eat operand size (e.g., byte, word). 1512 if (Tok.getString() != "PTR" && Tok.getString() != "ptr") 1513 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!"); 1514 Parser.Lex(); // Eat ptr. 1515 } 1516 Start = Tok.getLoc(); 1517 1518 // Immediate. 1519 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) || 1520 getLexer().is(AsmToken::LParen)) { 1521 AsmToken StartTok = Tok; 1522 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, 1523 /*AddImmPrefix=*/false); 1524 if (ParseIntelExpression(SM, End)) 1525 return 0; 1526 1527 int64_t Imm = SM.getImm(); 1528 if (isParsingInlineAsm()) { 1529 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); 1530 if (StartTok.getString().size() == Len) 1531 // Just add a prefix if this wasn't a complex immediate expression. 1532 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); 1533 else 1534 // Otherwise, rewrite the complex expression as a single immediate. 1535 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm)); 1536 } 1537 1538 if (getLexer().isNot(AsmToken::LBrac)) { 1539 // If a directional label (ie. 1f or 2b) was parsed above from 1540 // ParseIntelExpression() then SM.getSym() was set to a pointer to 1541 // to the MCExpr with the directional local symbol and this is a 1542 // memory operand not an immediate operand. 1543 if (SM.getSym()) 1544 return X86Operand::CreateMem(SM.getSym(), Start, End, Size); 1545 1546 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext()); 1547 return X86Operand::CreateImm(ImmExpr, Start, End); 1548 } 1549 1550 // Only positive immediates are valid. 1551 if (Imm < 0) 1552 return ErrorOperand(Start, "expected a positive immediate displacement " 1553 "before bracketed expr."); 1554 1555 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. 1556 return ParseIntelMemOperand(Imm, Start, Size); 1557 } 1558 1559 // Register. 1560 unsigned RegNo = 0; 1561 if (!ParseRegister(RegNo, Start, End)) { 1562 // If this is a segment register followed by a ':', then this is the start 1563 // of a segment override, otherwise this is a normal register reference. 1564 if (getLexer().isNot(AsmToken::Colon)) 1565 return X86Operand::CreateReg(RegNo, Start, End); 1566 1567 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size); 1568 } 1569 1570 // Memory operand. 1571 return ParseIntelMemOperand(/*Disp=*/0, Start, Size); 1572} 1573 1574X86Operand *X86AsmParser::ParseATTOperand() { 1575 switch (getLexer().getKind()) { 1576 default: 1577 // Parse a memory operand with no segment register. 1578 return ParseMemOperand(0, Parser.getTok().getLoc()); 1579 case AsmToken::Percent: { 1580 // Read the register. 1581 unsigned RegNo; 1582 SMLoc Start, End; 1583 if (ParseRegister(RegNo, Start, End)) return 0; 1584 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 1585 Error(Start, "%eiz and %riz can only be used as index registers", 1586 SMRange(Start, End)); 1587 return 0; 1588 } 1589 1590 // If this is a segment register followed by a ':', then this is the start 1591 // of a memory reference, otherwise this is a normal register reference. 1592 if (getLexer().isNot(AsmToken::Colon)) 1593 return X86Operand::CreateReg(RegNo, Start, End); 1594 1595 getParser().Lex(); // Eat the colon. 1596 return ParseMemOperand(RegNo, Start); 1597 } 1598 case AsmToken::Dollar: { 1599 // $42 -> immediate. 1600 SMLoc Start = Parser.getTok().getLoc(), End; 1601 Parser.Lex(); 1602 const MCExpr *Val; 1603 if (getParser().parseExpression(Val, End)) 1604 return 0; 1605 return X86Operand::CreateImm(Val, Start, End); 1606 } 1607 } 1608} 1609 1610bool 1611X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, 1612 const MCParsedAsmOperand &Op) { 1613 if(STI.getFeatureBits() & X86::FeatureAVX512) { 1614 if (getLexer().is(AsmToken::LCurly)) { 1615 // Eat "{" and mark the current place. 1616 const SMLoc consumedToken = consumeToken(); 1617 // Distinguish {1to<NUM>} from {%k<NUM>}. 1618 if(getLexer().is(AsmToken::Integer)) { 1619 // Parse memory broadcasting ({1to<NUM>}). 1620 if (getLexer().getTok().getIntVal() != 1) 1621 return !ErrorAndEatStatement(getLexer().getLoc(), 1622 "Expected 1to<NUM> at this point"); 1623 Parser.Lex(); // Eat "1" of 1to8 1624 if (!getLexer().is(AsmToken::Identifier) || 1625 !getLexer().getTok().getIdentifier().startswith("to")) 1626 return !ErrorAndEatStatement(getLexer().getLoc(), 1627 "Expected 1to<NUM> at this point"); 1628 // Recognize only reasonable suffixes. 1629 const char *BroadcastPrimitive = 1630 StringSwitch<const char*>(getLexer().getTok().getIdentifier()) 1631 .Case("to8", "{1to8}") 1632 .Case("to16", "{1to16}") 1633 .Default(0); 1634 if (!BroadcastPrimitive) 1635 return !ErrorAndEatStatement(getLexer().getLoc(), 1636 "Invalid memory broadcast primitive."); 1637 Parser.Lex(); // Eat "toN" of 1toN 1638 if (!getLexer().is(AsmToken::RCurly)) 1639 return !ErrorAndEatStatement(getLexer().getLoc(), 1640 "Expected } at this point"); 1641 Parser.Lex(); // Eat "}" 1642 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 1643 consumedToken)); 1644 // No AVX512 specific primitives can pass 1645 // after memory broadcasting, so return. 1646 return true; 1647 } else { 1648 // Parse mask register {%k1} 1649 Operands.push_back(X86Operand::CreateToken("{", consumedToken)); 1650 if (X86Operand *Op = ParseOperand()) { 1651 Operands.push_back(Op); 1652 if (!getLexer().is(AsmToken::RCurly)) 1653 return !ErrorAndEatStatement(getLexer().getLoc(), 1654 "Expected } at this point"); 1655 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 1656 1657 // Parse "zeroing non-masked" semantic {z} 1658 if (getLexer().is(AsmToken::LCurly)) { 1659 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken())); 1660 if (!getLexer().is(AsmToken::Identifier) || 1661 getLexer().getTok().getIdentifier() != "z") 1662 return !ErrorAndEatStatement(getLexer().getLoc(), 1663 "Expected z at this point"); 1664 Parser.Lex(); // Eat the z 1665 if (!getLexer().is(AsmToken::RCurly)) 1666 return !ErrorAndEatStatement(getLexer().getLoc(), 1667 "Expected } at this point"); 1668 Parser.Lex(); // Eat the } 1669 } 1670 } 1671 } 1672 } 1673 } 1674 return true; 1675} 1676 1677/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 1678/// has already been parsed if present. 1679X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { 1680 1681 // We have to disambiguate a parenthesized expression "(4+5)" from the start 1682 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 1683 // only way to do this without lookahead is to eat the '(' and see what is 1684 // after it. 1685 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 1686 if (getLexer().isNot(AsmToken::LParen)) { 1687 SMLoc ExprEnd; 1688 if (getParser().parseExpression(Disp, ExprEnd)) return 0; 1689 1690 // After parsing the base expression we could either have a parenthesized 1691 // memory address or not. If not, return now. If so, eat the (. 1692 if (getLexer().isNot(AsmToken::LParen)) { 1693 // Unless we have a segment register, treat this as an immediate. 1694 if (SegReg == 0) 1695 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 1696 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 1697 } 1698 1699 // Eat the '('. 1700 Parser.Lex(); 1701 } else { 1702 // Okay, we have a '('. We don't know if this is an expression or not, but 1703 // so we have to eat the ( to see beyond it. 1704 SMLoc LParenLoc = Parser.getTok().getLoc(); 1705 Parser.Lex(); // Eat the '('. 1706 1707 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 1708 // Nothing to do here, fall into the code below with the '(' part of the 1709 // memory operand consumed. 1710 } else { 1711 SMLoc ExprEnd; 1712 1713 // It must be an parenthesized expression, parse it now. 1714 if (getParser().parseParenExpression(Disp, ExprEnd)) 1715 return 0; 1716 1717 // After parsing the base expression we could either have a parenthesized 1718 // memory address or not. If not, return now. If so, eat the (. 1719 if (getLexer().isNot(AsmToken::LParen)) { 1720 // Unless we have a segment register, treat this as an immediate. 1721 if (SegReg == 0) 1722 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 1723 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 1724 } 1725 1726 // Eat the '('. 1727 Parser.Lex(); 1728 } 1729 } 1730 1731 // If we reached here, then we just ate the ( of the memory operand. Process 1732 // the rest of the memory operand. 1733 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 1734 SMLoc IndexLoc, BaseLoc; 1735 1736 if (getLexer().is(AsmToken::Percent)) { 1737 SMLoc StartLoc, EndLoc; 1738 BaseLoc = Parser.getTok().getLoc(); 1739 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0; 1740 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 1741 Error(StartLoc, "eiz and riz can only be used as index registers", 1742 SMRange(StartLoc, EndLoc)); 1743 return 0; 1744 } 1745 } 1746 1747 if (getLexer().is(AsmToken::Comma)) { 1748 Parser.Lex(); // Eat the comma. 1749 IndexLoc = Parser.getTok().getLoc(); 1750 1751 // Following the comma we should have either an index register, or a scale 1752 // value. We don't support the later form, but we want to parse it 1753 // correctly. 1754 // 1755 // Not that even though it would be completely consistent to support syntax 1756 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 1757 if (getLexer().is(AsmToken::Percent)) { 1758 SMLoc L; 1759 if (ParseRegister(IndexReg, L, L)) return 0; 1760 1761 if (getLexer().isNot(AsmToken::RParen)) { 1762 // Parse the scale amount: 1763 // ::= ',' [scale-expression] 1764 if (getLexer().isNot(AsmToken::Comma)) { 1765 Error(Parser.getTok().getLoc(), 1766 "expected comma in scale expression"); 1767 return 0; 1768 } 1769 Parser.Lex(); // Eat the comma. 1770 1771 if (getLexer().isNot(AsmToken::RParen)) { 1772 SMLoc Loc = Parser.getTok().getLoc(); 1773 1774 int64_t ScaleVal; 1775 if (getParser().parseAbsoluteExpression(ScaleVal)){ 1776 Error(Loc, "expected scale expression"); 1777 return 0; 1778 } 1779 1780 // Validate the scale amount. 1781 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1782 ScaleVal != 1) { 1783 Error(Loc, "scale factor in 16-bit address must be 1"); 1784 return 0; 1785 } 1786 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 1787 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 1788 return 0; 1789 } 1790 Scale = (unsigned)ScaleVal; 1791 } 1792 } 1793 } else if (getLexer().isNot(AsmToken::RParen)) { 1794 // A scale amount without an index is ignored. 1795 // index. 1796 SMLoc Loc = Parser.getTok().getLoc(); 1797 1798 int64_t Value; 1799 if (getParser().parseAbsoluteExpression(Value)) 1800 return 0; 1801 1802 if (Value != 1) 1803 Warning(Loc, "scale factor without index register is ignored"); 1804 Scale = 1; 1805 } 1806 } 1807 1808 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 1809 if (getLexer().isNot(AsmToken::RParen)) { 1810 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 1811 return 0; 1812 } 1813 SMLoc MemEnd = Parser.getTok().getEndLoc(); 1814 Parser.Lex(); // Eat the ')'. 1815 1816 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 1817 // and then only in non-64-bit modes. Except for DX, which is a special case 1818 // because an unofficial form of in/out instructions uses it. 1819 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1820 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP && 1821 BaseReg != X86::SI && BaseReg != X86::DI)) && 1822 BaseReg != X86::DX) { 1823 Error(BaseLoc, "invalid 16-bit base register"); 1824 return 0; 1825 } 1826 if (BaseReg == 0 && 1827 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 1828 Error(IndexLoc, "16-bit memory operand may not include only index register"); 1829 return 0; 1830 } 1831 1832 StringRef ErrMsg; 1833 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { 1834 Error(BaseLoc, ErrMsg); 1835 return 0; 1836 } 1837 1838 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 1839 MemStart, MemEnd); 1840} 1841 1842bool X86AsmParser:: 1843ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, 1844 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 1845 InstInfo = &Info; 1846 StringRef PatchedName = Name; 1847 1848 // FIXME: Hack to recognize setneb as setne. 1849 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 1850 PatchedName != "setb" && PatchedName != "setnb") 1851 PatchedName = PatchedName.substr(0, Name.size()-1); 1852 1853 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 1854 const MCExpr *ExtraImmOp = 0; 1855 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 1856 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 1857 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 1858 bool IsVCMP = PatchedName[0] == 'v'; 1859 unsigned SSECCIdx = IsVCMP ? 4 : 3; 1860 unsigned SSEComparisonCode = StringSwitch<unsigned>( 1861 PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) 1862 .Case("eq", 0x00) 1863 .Case("lt", 0x01) 1864 .Case("le", 0x02) 1865 .Case("unord", 0x03) 1866 .Case("neq", 0x04) 1867 .Case("nlt", 0x05) 1868 .Case("nle", 0x06) 1869 .Case("ord", 0x07) 1870 /* AVX only from here */ 1871 .Case("eq_uq", 0x08) 1872 .Case("nge", 0x09) 1873 .Case("ngt", 0x0A) 1874 .Case("false", 0x0B) 1875 .Case("neq_oq", 0x0C) 1876 .Case("ge", 0x0D) 1877 .Case("gt", 0x0E) 1878 .Case("true", 0x0F) 1879 .Case("eq_os", 0x10) 1880 .Case("lt_oq", 0x11) 1881 .Case("le_oq", 0x12) 1882 .Case("unord_s", 0x13) 1883 .Case("neq_us", 0x14) 1884 .Case("nlt_uq", 0x15) 1885 .Case("nle_uq", 0x16) 1886 .Case("ord_s", 0x17) 1887 .Case("eq_us", 0x18) 1888 .Case("nge_uq", 0x19) 1889 .Case("ngt_uq", 0x1A) 1890 .Case("false_os", 0x1B) 1891 .Case("neq_os", 0x1C) 1892 .Case("ge_oq", 0x1D) 1893 .Case("gt_oq", 0x1E) 1894 .Case("true_us", 0x1F) 1895 .Default(~0U); 1896 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) { 1897 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, 1898 getParser().getContext()); 1899 if (PatchedName.endswith("ss")) { 1900 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 1901 } else if (PatchedName.endswith("sd")) { 1902 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 1903 } else if (PatchedName.endswith("ps")) { 1904 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 1905 } else { 1906 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); 1907 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 1908 } 1909 } 1910 } 1911 1912 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 1913 1914 if (ExtraImmOp && !isParsingIntelSyntax()) 1915 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 1916 1917 // Determine whether this is an instruction prefix. 1918 bool isPrefix = 1919 Name == "lock" || Name == "rep" || 1920 Name == "repe" || Name == "repz" || 1921 Name == "repne" || Name == "repnz" || 1922 Name == "rex64" || Name == "data16"; 1923 1924 1925 // This does the actual operand parsing. Don't parse any more if we have a 1926 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 1927 // just want to parse the "lock" as the first instruction and the "incl" as 1928 // the next one. 1929 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 1930 1931 // Parse '*' modifier. 1932 if (getLexer().is(AsmToken::Star)) 1933 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 1934 1935 // Read the operands. 1936 while(1) { 1937 if (X86Operand *Op = ParseOperand()) { 1938 Operands.push_back(Op); 1939 if (!HandleAVX512Operand(Operands, *Op)) 1940 return true; 1941 } else { 1942 Parser.eatToEndOfStatement(); 1943 return true; 1944 } 1945 // check for comma and eat it 1946 if (getLexer().is(AsmToken::Comma)) 1947 Parser.Lex(); 1948 else 1949 break; 1950 } 1951 1952 if (getLexer().isNot(AsmToken::EndOfStatement)) 1953 return ErrorAndEatStatement(getLexer().getLoc(), 1954 "unexpected token in argument list"); 1955 } 1956 1957 // Consume the EndOfStatement or the prefix separator Slash 1958 if (getLexer().is(AsmToken::EndOfStatement) || 1959 (isPrefix && getLexer().is(AsmToken::Slash))) 1960 Parser.Lex(); 1961 1962 if (ExtraImmOp && isParsingIntelSyntax()) 1963 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 1964 1965 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> 1966 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 1967 // documented form in various unofficial manuals, so a lot of code uses it. 1968 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && 1969 Operands.size() == 3) { 1970 X86Operand &Op = *(X86Operand*)Operands.back(); 1971 if (Op.isMem() && Op.Mem.SegReg == 0 && 1972 isa<MCConstantExpr>(Op.Mem.Disp) && 1973 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1974 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 1975 SMLoc Loc = Op.getEndLoc(); 1976 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 1977 delete &Op; 1978 } 1979 } 1980 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". 1981 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && 1982 Operands.size() == 3) { 1983 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1984 if (Op.isMem() && Op.Mem.SegReg == 0 && 1985 isa<MCConstantExpr>(Op.Mem.Disp) && 1986 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1987 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 1988 SMLoc Loc = Op.getEndLoc(); 1989 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 1990 delete &Op; 1991 } 1992 } 1993 1994 // Append default arguments to "ins[bwld]" 1995 if (Name.startswith("ins") && Operands.size() == 1 && 1996 (Name == "insb" || Name == "insw" || Name == "insl" || 1997 Name == "insd" )) { 1998 if (isParsingIntelSyntax()) { 1999 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2000 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2001 } else { 2002 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2003 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2004 } 2005 } 2006 2007 // Append default arguments to "outs[bwld]" 2008 if (Name.startswith("outs") && Operands.size() == 1 && 2009 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 2010 Name == "outsd" )) { 2011 if (isParsingIntelSyntax()) { 2012 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2013 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2014 } else { 2015 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2016 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2017 } 2018 } 2019 2020 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 2021 // values of $SIREG according to the mode. It would be nice if this 2022 // could be achieved with InstAlias in the tables. 2023 if (Name.startswith("lods") && Operands.size() == 1 && 2024 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 2025 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) 2026 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2027 2028 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 2029 // values of $DIREG according to the mode. It would be nice if this 2030 // could be achieved with InstAlias in the tables. 2031 if (Name.startswith("stos") && Operands.size() == 1 && 2032 (Name == "stos" || Name == "stosb" || Name == "stosw" || 2033 Name == "stosl" || Name == "stosd" || Name == "stosq")) 2034 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2035 2036 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 2037 // values of $DIREG according to the mode. It would be nice if this 2038 // could be achieved with InstAlias in the tables. 2039 if (Name.startswith("scas") && Operands.size() == 1 && 2040 (Name == "scas" || Name == "scasb" || Name == "scasw" || 2041 Name == "scasl" || Name == "scasd" || Name == "scasq")) 2042 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2043 2044 // Add default SI and DI operands to "cmps[bwlq]". 2045 if (Name.startswith("cmps") && 2046 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 2047 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 2048 if (Operands.size() == 1) { 2049 if (isParsingIntelSyntax()) { 2050 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2051 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2052 } else { 2053 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2054 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2055 } 2056 } else if (Operands.size() == 3) { 2057 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 2058 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 2059 if (!doSrcDstMatch(Op, Op2)) 2060 return Error(Op.getStartLoc(), 2061 "mismatching source and destination index registers"); 2062 } 2063 } 2064 2065 // Add default SI and DI operands to "movs[bwlq]". 2066 if ((Name.startswith("movs") && 2067 (Name == "movs" || Name == "movsb" || Name == "movsw" || 2068 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 2069 (Name.startswith("smov") && 2070 (Name == "smov" || Name == "smovb" || Name == "smovw" || 2071 Name == "smovl" || Name == "smovd" || Name == "smovq"))) { 2072 if (Operands.size() == 1) { 2073 if (Name == "movsd") 2074 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 2075 if (isParsingIntelSyntax()) { 2076 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2077 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2078 } else { 2079 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2080 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2081 } 2082 } else if (Operands.size() == 3) { 2083 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 2084 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 2085 if (!doSrcDstMatch(Op, Op2)) 2086 return Error(Op.getStartLoc(), 2087 "mismatching source and destination index registers"); 2088 } 2089 } 2090 2091 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 2092 // "shift <op>". 2093 if ((Name.startswith("shr") || Name.startswith("sar") || 2094 Name.startswith("shl") || Name.startswith("sal") || 2095 Name.startswith("rcl") || Name.startswith("rcr") || 2096 Name.startswith("rol") || Name.startswith("ror")) && 2097 Operands.size() == 3) { 2098 if (isParsingIntelSyntax()) { 2099 // Intel syntax 2100 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]); 2101 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 2102 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 2103 delete Operands[2]; 2104 Operands.pop_back(); 2105 } 2106 } else { 2107 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 2108 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 2109 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 2110 delete Operands[1]; 2111 Operands.erase(Operands.begin() + 1); 2112 } 2113 } 2114 } 2115 2116 // Transforms "int $3" into "int3" as a size optimization. We can't write an 2117 // instalias with an immediate operand yet. 2118 if (Name == "int" && Operands.size() == 2) { 2119 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 2120 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 2121 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) { 2122 delete Operands[1]; 2123 Operands.erase(Operands.begin() + 1); 2124 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3"); 2125 } 2126 } 2127 2128 return false; 2129} 2130 2131static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, 2132 bool isCmp) { 2133 MCInst TmpInst; 2134 TmpInst.setOpcode(Opcode); 2135 if (!isCmp) 2136 TmpInst.addOperand(MCOperand::CreateReg(Reg)); 2137 TmpInst.addOperand(MCOperand::CreateReg(Reg)); 2138 TmpInst.addOperand(Inst.getOperand(0)); 2139 Inst = TmpInst; 2140 return true; 2141} 2142 2143static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, 2144 bool isCmp = false) { 2145 if (!Inst.getOperand(0).isImm() || 2146 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 2147 return false; 2148 2149 return convertToSExti8(Inst, Opcode, X86::AX, isCmp); 2150} 2151 2152static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, 2153 bool isCmp = false) { 2154 if (!Inst.getOperand(0).isImm() || 2155 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 2156 return false; 2157 2158 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); 2159} 2160 2161static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, 2162 bool isCmp = false) { 2163 if (!Inst.getOperand(0).isImm() || 2164 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 2165 return false; 2166 2167 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); 2168} 2169 2170bool X86AsmParser:: 2171processInstruction(MCInst &Inst, 2172 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { 2173 switch (Inst.getOpcode()) { 2174 default: return false; 2175 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); 2176 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); 2177 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); 2178 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); 2179 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); 2180 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); 2181 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); 2182 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); 2183 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); 2184 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); 2185 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); 2186 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); 2187 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); 2188 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); 2189 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); 2190 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); 2191 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); 2192 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); 2193 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); 2194 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); 2195 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); 2196 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); 2197 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); 2198 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); 2199 case X86::VMOVAPDrr: 2200 case X86::VMOVAPDYrr: 2201 case X86::VMOVAPSrr: 2202 case X86::VMOVAPSYrr: 2203 case X86::VMOVDQArr: 2204 case X86::VMOVDQAYrr: 2205 case X86::VMOVDQUrr: 2206 case X86::VMOVDQUYrr: 2207 case X86::VMOVUPDrr: 2208 case X86::VMOVUPDYrr: 2209 case X86::VMOVUPSrr: 2210 case X86::VMOVUPSYrr: { 2211 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) || 2212 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg())) 2213 return false; 2214 2215 unsigned NewOpc; 2216 switch (Inst.getOpcode()) { 2217 default: llvm_unreachable("Invalid opcode"); 2218 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; 2219 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; 2220 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; 2221 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; 2222 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; 2223 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; 2224 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; 2225 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; 2226 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; 2227 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; 2228 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; 2229 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; 2230 } 2231 Inst.setOpcode(NewOpc); 2232 return true; 2233 } 2234 case X86::VMOVSDrr: 2235 case X86::VMOVSSrr: { 2236 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) || 2237 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg())) 2238 return false; 2239 unsigned NewOpc; 2240 switch (Inst.getOpcode()) { 2241 default: llvm_unreachable("Invalid opcode"); 2242 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; 2243 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; 2244 } 2245 Inst.setOpcode(NewOpc); 2246 return true; 2247 } 2248 } 2249} 2250 2251static const char *getSubtargetFeatureName(unsigned Val); 2252 2253void X86AsmParser::EmitInstruction( 2254 MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands, 2255 MCStreamer &Out) { 2256 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), Out); 2257 Out.EmitInstruction(Inst, STI); 2258} 2259 2260bool X86AsmParser:: 2261MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2262 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 2263 MCStreamer &Out, unsigned &ErrorInfo, 2264 bool MatchingInlineAsm) { 2265 assert(!Operands.empty() && "Unexpect empty operand list!"); 2266 X86Operand *Op = static_cast<X86Operand*>(Operands[0]); 2267 assert(Op->isToken() && "Leading operand should always be a mnemonic!"); 2268 ArrayRef<SMRange> EmptyRanges = None; 2269 2270 // First, handle aliases that expand to multiple instructions. 2271 // FIXME: This should be replaced with a real .td file alias mechanism. 2272 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 2273 // call. 2274 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || 2275 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || 2276 Op->getToken() == "finit" || Op->getToken() == "fsave" || 2277 Op->getToken() == "fstenv" || Op->getToken() == "fclex") { 2278 MCInst Inst; 2279 Inst.setOpcode(X86::WAIT); 2280 Inst.setLoc(IDLoc); 2281 if (!MatchingInlineAsm) 2282 EmitInstruction(Inst, Operands, Out); 2283 2284 const char *Repl = 2285 StringSwitch<const char*>(Op->getToken()) 2286 .Case("finit", "fninit") 2287 .Case("fsave", "fnsave") 2288 .Case("fstcw", "fnstcw") 2289 .Case("fstcww", "fnstcw") 2290 .Case("fstenv", "fnstenv") 2291 .Case("fstsw", "fnstsw") 2292 .Case("fstsww", "fnstsw") 2293 .Case("fclex", "fnclex") 2294 .Default(0); 2295 assert(Repl && "Unknown wait-prefixed instruction"); 2296 delete Operands[0]; 2297 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 2298 } 2299 2300 bool WasOriginallyInvalidOperand = false; 2301 MCInst Inst; 2302 2303 // First, try a direct match. 2304 switch (MatchInstructionImpl(Operands, Inst, 2305 ErrorInfo, MatchingInlineAsm, 2306 isParsingIntelSyntax())) { 2307 default: break; 2308 case Match_Success: 2309 // Some instructions need post-processing to, for example, tweak which 2310 // encoding is selected. Loop on it while changes happen so the 2311 // individual transformations can chain off each other. 2312 if (!MatchingInlineAsm) 2313 while (processInstruction(Inst, Operands)) 2314 ; 2315 2316 Inst.setLoc(IDLoc); 2317 if (!MatchingInlineAsm) 2318 EmitInstruction(Inst, Operands, Out); 2319 Opcode = Inst.getOpcode(); 2320 return false; 2321 case Match_MissingFeature: { 2322 assert(ErrorInfo && "Unknown missing feature!"); 2323 // Special case the error message for the very common case where only 2324 // a single subtarget feature is missing. 2325 std::string Msg = "instruction requires:"; 2326 unsigned Mask = 1; 2327 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { 2328 if (ErrorInfo & Mask) { 2329 Msg += " "; 2330 Msg += getSubtargetFeatureName(ErrorInfo & Mask); 2331 } 2332 Mask <<= 1; 2333 } 2334 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); 2335 } 2336 case Match_InvalidOperand: 2337 WasOriginallyInvalidOperand = true; 2338 break; 2339 case Match_MnemonicFail: 2340 break; 2341 } 2342 2343 // FIXME: Ideally, we would only attempt suffix matches for things which are 2344 // valid prefixes, and we could just infer the right unambiguous 2345 // type. However, that requires substantially more matcher support than the 2346 // following hack. 2347 2348 // Change the operand to point to a temporary token. 2349 StringRef Base = Op->getToken(); 2350 SmallString<16> Tmp; 2351 Tmp += Base; 2352 Tmp += ' '; 2353 Op->setTokenValue(Tmp.str()); 2354 2355 // If this instruction starts with an 'f', then it is a floating point stack 2356 // instruction. These come in up to three forms for 32-bit, 64-bit, and 2357 // 80-bit floating point, which use the suffixes s,l,t respectively. 2358 // 2359 // Otherwise, we assume that this may be an integer instruction, which comes 2360 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 2361 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 2362 2363 // Check for the various suffix matches. 2364 Tmp[Base.size()] = Suffixes[0]; 2365 unsigned ErrorInfoIgnore; 2366 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings. 2367 unsigned Match1, Match2, Match3, Match4; 2368 2369 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2370 MatchingInlineAsm, isParsingIntelSyntax()); 2371 // If this returned as a missing feature failure, remember that. 2372 if (Match1 == Match_MissingFeature) 2373 ErrorInfoMissingFeature = ErrorInfoIgnore; 2374 Tmp[Base.size()] = Suffixes[1]; 2375 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2376 MatchingInlineAsm, isParsingIntelSyntax()); 2377 // If this returned as a missing feature failure, remember that. 2378 if (Match2 == Match_MissingFeature) 2379 ErrorInfoMissingFeature = ErrorInfoIgnore; 2380 Tmp[Base.size()] = Suffixes[2]; 2381 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2382 MatchingInlineAsm, isParsingIntelSyntax()); 2383 // If this returned as a missing feature failure, remember that. 2384 if (Match3 == Match_MissingFeature) 2385 ErrorInfoMissingFeature = ErrorInfoIgnore; 2386 Tmp[Base.size()] = Suffixes[3]; 2387 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2388 MatchingInlineAsm, isParsingIntelSyntax()); 2389 // If this returned as a missing feature failure, remember that. 2390 if (Match4 == Match_MissingFeature) 2391 ErrorInfoMissingFeature = ErrorInfoIgnore; 2392 2393 // Restore the old token. 2394 Op->setTokenValue(Base); 2395 2396 // If exactly one matched, then we treat that as a successful match (and the 2397 // instruction will already have been filled in correctly, since the failing 2398 // matches won't have modified it). 2399 unsigned NumSuccessfulMatches = 2400 (Match1 == Match_Success) + (Match2 == Match_Success) + 2401 (Match3 == Match_Success) + (Match4 == Match_Success); 2402 if (NumSuccessfulMatches == 1) { 2403 Inst.setLoc(IDLoc); 2404 if (!MatchingInlineAsm) 2405 EmitInstruction(Inst, Operands, Out); 2406 Opcode = Inst.getOpcode(); 2407 return false; 2408 } 2409 2410 // Otherwise, the match failed, try to produce a decent error message. 2411 2412 // If we had multiple suffix matches, then identify this as an ambiguous 2413 // match. 2414 if (NumSuccessfulMatches > 1) { 2415 char MatchChars[4]; 2416 unsigned NumMatches = 0; 2417 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0]; 2418 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1]; 2419 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2]; 2420 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3]; 2421 2422 SmallString<126> Msg; 2423 raw_svector_ostream OS(Msg); 2424 OS << "ambiguous instructions require an explicit suffix (could be "; 2425 for (unsigned i = 0; i != NumMatches; ++i) { 2426 if (i != 0) 2427 OS << ", "; 2428 if (i + 1 == NumMatches) 2429 OS << "or "; 2430 OS << "'" << Base << MatchChars[i] << "'"; 2431 } 2432 OS << ")"; 2433 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); 2434 return true; 2435 } 2436 2437 // Okay, we know that none of the variants matched successfully. 2438 2439 // If all of the instructions reported an invalid mnemonic, then the original 2440 // mnemonic was invalid. 2441 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && 2442 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { 2443 if (!WasOriginallyInvalidOperand) { 2444 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges : 2445 Op->getLocRange(); 2446 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 2447 Ranges, MatchingInlineAsm); 2448 } 2449 2450 // Recover location info for the operand if we know which was the problem. 2451 if (ErrorInfo != ~0U) { 2452 if (ErrorInfo >= Operands.size()) 2453 return Error(IDLoc, "too few operands for instruction", 2454 EmptyRanges, MatchingInlineAsm); 2455 2456 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo]; 2457 if (Operand->getStartLoc().isValid()) { 2458 SMRange OperandRange = Operand->getLocRange(); 2459 return Error(Operand->getStartLoc(), "invalid operand for instruction", 2460 OperandRange, MatchingInlineAsm); 2461 } 2462 } 2463 2464 return Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2465 MatchingInlineAsm); 2466 } 2467 2468 // If one instruction matched with a missing feature, report this as a 2469 // missing feature. 2470 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + 2471 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ 2472 std::string Msg = "instruction requires:"; 2473 unsigned Mask = 1; 2474 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) { 2475 if (ErrorInfoMissingFeature & Mask) { 2476 Msg += " "; 2477 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask); 2478 } 2479 Mask <<= 1; 2480 } 2481 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); 2482 } 2483 2484 // If one instruction matched with an invalid operand, report this as an 2485 // operand failure. 2486 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + 2487 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ 2488 Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2489 MatchingInlineAsm); 2490 return true; 2491 } 2492 2493 // If all of these were an outright failure, report it in a useless way. 2494 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 2495 EmptyRanges, MatchingInlineAsm); 2496 return true; 2497} 2498 2499 2500bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 2501 StringRef IDVal = DirectiveID.getIdentifier(); 2502 if (IDVal == ".word") 2503 return ParseDirectiveWord(2, DirectiveID.getLoc()); 2504 else if (IDVal.startswith(".code")) 2505 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 2506 else if (IDVal.startswith(".att_syntax")) { 2507 getParser().setAssemblerDialect(0); 2508 return false; 2509 } else if (IDVal.startswith(".intel_syntax")) { 2510 getParser().setAssemblerDialect(1); 2511 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2512 // FIXME: Handle noprefix 2513 if (Parser.getTok().getString() == "noprefix") 2514 Parser.Lex(); 2515 } 2516 return false; 2517 } 2518 return true; 2519} 2520 2521/// ParseDirectiveWord 2522/// ::= .word [ expression (, expression)* ] 2523bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 2524 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2525 for (;;) { 2526 const MCExpr *Value; 2527 if (getParser().parseExpression(Value)) 2528 return false; 2529 2530 getParser().getStreamer().EmitValue(Value, Size); 2531 2532 if (getLexer().is(AsmToken::EndOfStatement)) 2533 break; 2534 2535 // FIXME: Improve diagnostic. 2536 if (getLexer().isNot(AsmToken::Comma)) { 2537 Error(L, "unexpected token in directive"); 2538 return false; 2539 } 2540 Parser.Lex(); 2541 } 2542 } 2543 2544 Parser.Lex(); 2545 return false; 2546} 2547 2548/// ParseDirectiveCode 2549/// ::= .code16 | .code32 | .code64 2550bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 2551 if (IDVal == ".code16") { 2552 Parser.Lex(); 2553 if (!is16BitMode()) { 2554 SwitchMode(X86::Mode16Bit); 2555 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 2556 } 2557 } else if (IDVal == ".code32") { 2558 Parser.Lex(); 2559 if (!is32BitMode()) { 2560 SwitchMode(X86::Mode32Bit); 2561 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 2562 } 2563 } else if (IDVal == ".code64") { 2564 Parser.Lex(); 2565 if (!is64BitMode()) { 2566 SwitchMode(X86::Mode64Bit); 2567 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); 2568 } 2569 } else { 2570 Error(L, "unknown directive " + IDVal); 2571 return false; 2572 } 2573 2574 return false; 2575} 2576 2577// Force static initialization. 2578extern "C" void LLVMInitializeX86AsmParser() { 2579 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target); 2580 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target); 2581} 2582 2583#define GET_REGISTER_MATCHER 2584#define GET_MATCHER_IMPLEMENTATION 2585#define GET_SUBTARGET_FEATURE_NAME 2586#include "X86GenAsmMatcher.inc" 2587