1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "MCTargetDesc/X86BaseInfo.h" 11#include "X86AsmInstrumentation.h" 12#include "X86AsmParserCommon.h" 13#include "X86Operand.h" 14#include "X86ISelLowering.h" 15#include "llvm/ADT/APFloat.h" 16#include "llvm/ADT/STLExtras.h" 17#include "llvm/ADT/SmallString.h" 18#include "llvm/ADT/SmallVector.h" 19#include "llvm/ADT/StringSwitch.h" 20#include "llvm/ADT/Twine.h" 21#include "llvm/MC/MCContext.h" 22#include "llvm/MC/MCExpr.h" 23#include "llvm/MC/MCInst.h" 24#include "llvm/MC/MCInstrInfo.h" 25#include "llvm/MC/MCParser/MCAsmLexer.h" 26#include "llvm/MC/MCParser/MCAsmParser.h" 27#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 28#include "llvm/MC/MCRegisterInfo.h" 29#include "llvm/MC/MCSection.h" 30#include "llvm/MC/MCStreamer.h" 31#include "llvm/MC/MCSubtargetInfo.h" 32#include "llvm/MC/MCSymbol.h" 33#include "llvm/MC/MCTargetAsmParser.h" 34#include "llvm/Support/SourceMgr.h" 35#include "llvm/Support/TargetRegistry.h" 36#include "llvm/Support/raw_ostream.h" 37#include <algorithm> 38#include <memory> 39 40using namespace llvm; 41 42namespace { 43 44static const char OpPrecedence[] = { 45 0, // IC_OR 46 1, // IC_XOR 47 2, // IC_AND 48 3, // IC_LSHIFT 49 3, // IC_RSHIFT 50 4, // IC_PLUS 51 4, // IC_MINUS 52 5, // IC_MULTIPLY 53 5, // IC_DIVIDE 54 6, // IC_RPAREN 55 7, // IC_LPAREN 56 0, // IC_IMM 57 0 // IC_REGISTER 58}; 59 60class X86AsmParser : public MCTargetAsmParser { 61 const MCInstrInfo &MII; 62 ParseInstructionInfo *InstInfo; 63 std::unique_ptr<X86AsmInstrumentation> Instrumentation; 64 65private: 66 SMLoc consumeToken() { 67 MCAsmParser &Parser = getParser(); 68 SMLoc Result = Parser.getTok().getLoc(); 69 Parser.Lex(); 70 return Result; 71 } 72 73 enum InfixCalculatorTok { 74 IC_OR = 0, 75 IC_XOR, 76 IC_AND, 77 IC_LSHIFT, 78 IC_RSHIFT, 79 IC_PLUS, 80 IC_MINUS, 81 IC_MULTIPLY, 82 IC_DIVIDE, 83 IC_RPAREN, 84 IC_LPAREN, 85 IC_IMM, 86 IC_REGISTER 87 }; 88 89 class InfixCalculator { 90 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 91 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 92 SmallVector<ICToken, 4> PostfixStack; 93 94 public: 95 int64_t popOperand() { 96 assert (!PostfixStack.empty() && "Poped an empty stack!"); 97 ICToken Op = PostfixStack.pop_back_val(); 98 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) 99 && "Expected and immediate or register!"); 100 return Op.second; 101 } 102 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 103 assert ((Op == IC_IMM || Op == IC_REGISTER) && 104 "Unexpected operand!"); 105 PostfixStack.push_back(std::make_pair(Op, Val)); 106 } 107 108 void popOperator() { InfixOperatorStack.pop_back(); } 109 void pushOperator(InfixCalculatorTok Op) { 110 // Push the new operator if the stack is empty. 111 if (InfixOperatorStack.empty()) { 112 InfixOperatorStack.push_back(Op); 113 return; 114 } 115 116 // Push the new operator if it has a higher precedence than the operator 117 // on the top of the stack or the operator on the top of the stack is a 118 // left parentheses. 119 unsigned Idx = InfixOperatorStack.size() - 1; 120 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 121 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 122 InfixOperatorStack.push_back(Op); 123 return; 124 } 125 126 // The operator on the top of the stack has higher precedence than the 127 // new operator. 128 unsigned ParenCount = 0; 129 while (1) { 130 // Nothing to process. 131 if (InfixOperatorStack.empty()) 132 break; 133 134 Idx = InfixOperatorStack.size() - 1; 135 StackOp = InfixOperatorStack[Idx]; 136 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 137 break; 138 139 // If we have an even parentheses count and we see a left parentheses, 140 // then stop processing. 141 if (!ParenCount && StackOp == IC_LPAREN) 142 break; 143 144 if (StackOp == IC_RPAREN) { 145 ++ParenCount; 146 InfixOperatorStack.pop_back(); 147 } else if (StackOp == IC_LPAREN) { 148 --ParenCount; 149 InfixOperatorStack.pop_back(); 150 } else { 151 InfixOperatorStack.pop_back(); 152 PostfixStack.push_back(std::make_pair(StackOp, 0)); 153 } 154 } 155 // Push the new operator. 156 InfixOperatorStack.push_back(Op); 157 } 158 159 int64_t execute() { 160 // Push any remaining operators onto the postfix stack. 161 while (!InfixOperatorStack.empty()) { 162 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 163 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 164 PostfixStack.push_back(std::make_pair(StackOp, 0)); 165 } 166 167 if (PostfixStack.empty()) 168 return 0; 169 170 SmallVector<ICToken, 16> OperandStack; 171 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { 172 ICToken Op = PostfixStack[i]; 173 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 174 OperandStack.push_back(Op); 175 } else { 176 assert (OperandStack.size() > 1 && "Too few operands."); 177 int64_t Val; 178 ICToken Op2 = OperandStack.pop_back_val(); 179 ICToken Op1 = OperandStack.pop_back_val(); 180 switch (Op.first) { 181 default: 182 report_fatal_error("Unexpected operator!"); 183 break; 184 case IC_PLUS: 185 Val = Op1.second + Op2.second; 186 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 187 break; 188 case IC_MINUS: 189 Val = Op1.second - Op2.second; 190 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 191 break; 192 case IC_MULTIPLY: 193 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 194 "Multiply operation with an immediate and a register!"); 195 Val = Op1.second * Op2.second; 196 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 197 break; 198 case IC_DIVIDE: 199 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 200 "Divide operation with an immediate and a register!"); 201 assert (Op2.second != 0 && "Division by zero!"); 202 Val = Op1.second / Op2.second; 203 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 204 break; 205 case IC_OR: 206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 207 "Or operation with an immediate and a register!"); 208 Val = Op1.second | Op2.second; 209 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 210 break; 211 case IC_XOR: 212 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 213 "Xor operation with an immediate and a register!"); 214 Val = Op1.second ^ Op2.second; 215 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 216 break; 217 case IC_AND: 218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 219 "And operation with an immediate and a register!"); 220 Val = Op1.second & Op2.second; 221 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 222 break; 223 case IC_LSHIFT: 224 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 225 "Left shift operation with an immediate and a register!"); 226 Val = Op1.second << Op2.second; 227 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 228 break; 229 case IC_RSHIFT: 230 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 231 "Right shift operation with an immediate and a register!"); 232 Val = Op1.second >> Op2.second; 233 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 234 break; 235 } 236 } 237 } 238 assert (OperandStack.size() == 1 && "Expected a single result."); 239 return OperandStack.pop_back_val().second; 240 } 241 }; 242 243 enum IntelExprState { 244 IES_OR, 245 IES_XOR, 246 IES_AND, 247 IES_LSHIFT, 248 IES_RSHIFT, 249 IES_PLUS, 250 IES_MINUS, 251 IES_NOT, 252 IES_MULTIPLY, 253 IES_DIVIDE, 254 IES_LBRAC, 255 IES_RBRAC, 256 IES_LPAREN, 257 IES_RPAREN, 258 IES_REGISTER, 259 IES_INTEGER, 260 IES_IDENTIFIER, 261 IES_ERROR 262 }; 263 264 class IntelExprStateMachine { 265 IntelExprState State, PrevState; 266 unsigned BaseReg, IndexReg, TmpReg, Scale; 267 int64_t Imm; 268 const MCExpr *Sym; 269 StringRef SymName; 270 bool StopOnLBrac, AddImmPrefix; 271 InfixCalculator IC; 272 InlineAsmIdentifierInfo Info; 273 274 public: 275 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : 276 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), 277 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac), 278 AddImmPrefix(addimmprefix) { Info.clear(); } 279 280 unsigned getBaseReg() { return BaseReg; } 281 unsigned getIndexReg() { return IndexReg; } 282 unsigned getScale() { return Scale; } 283 const MCExpr *getSym() { return Sym; } 284 StringRef getSymName() { return SymName; } 285 int64_t getImm() { return Imm + IC.execute(); } 286 bool isValidEndState() { 287 return State == IES_RBRAC || State == IES_INTEGER; 288 } 289 bool getStopOnLBrac() { return StopOnLBrac; } 290 bool getAddImmPrefix() { return AddImmPrefix; } 291 bool hadError() { return State == IES_ERROR; } 292 293 InlineAsmIdentifierInfo &getIdentifierInfo() { 294 return Info; 295 } 296 297 void onOr() { 298 IntelExprState CurrState = State; 299 switch (State) { 300 default: 301 State = IES_ERROR; 302 break; 303 case IES_INTEGER: 304 case IES_RPAREN: 305 case IES_REGISTER: 306 State = IES_OR; 307 IC.pushOperator(IC_OR); 308 break; 309 } 310 PrevState = CurrState; 311 } 312 void onXor() { 313 IntelExprState CurrState = State; 314 switch (State) { 315 default: 316 State = IES_ERROR; 317 break; 318 case IES_INTEGER: 319 case IES_RPAREN: 320 case IES_REGISTER: 321 State = IES_XOR; 322 IC.pushOperator(IC_XOR); 323 break; 324 } 325 PrevState = CurrState; 326 } 327 void onAnd() { 328 IntelExprState CurrState = State; 329 switch (State) { 330 default: 331 State = IES_ERROR; 332 break; 333 case IES_INTEGER: 334 case IES_RPAREN: 335 case IES_REGISTER: 336 State = IES_AND; 337 IC.pushOperator(IC_AND); 338 break; 339 } 340 PrevState = CurrState; 341 } 342 void onLShift() { 343 IntelExprState CurrState = State; 344 switch (State) { 345 default: 346 State = IES_ERROR; 347 break; 348 case IES_INTEGER: 349 case IES_RPAREN: 350 case IES_REGISTER: 351 State = IES_LSHIFT; 352 IC.pushOperator(IC_LSHIFT); 353 break; 354 } 355 PrevState = CurrState; 356 } 357 void onRShift() { 358 IntelExprState CurrState = State; 359 switch (State) { 360 default: 361 State = IES_ERROR; 362 break; 363 case IES_INTEGER: 364 case IES_RPAREN: 365 case IES_REGISTER: 366 State = IES_RSHIFT; 367 IC.pushOperator(IC_RSHIFT); 368 break; 369 } 370 PrevState = CurrState; 371 } 372 void onPlus() { 373 IntelExprState CurrState = State; 374 switch (State) { 375 default: 376 State = IES_ERROR; 377 break; 378 case IES_INTEGER: 379 case IES_RPAREN: 380 case IES_REGISTER: 381 State = IES_PLUS; 382 IC.pushOperator(IC_PLUS); 383 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 384 // If we already have a BaseReg, then assume this is the IndexReg with 385 // a scale of 1. 386 if (!BaseReg) { 387 BaseReg = TmpReg; 388 } else { 389 assert (!IndexReg && "BaseReg/IndexReg already set!"); 390 IndexReg = TmpReg; 391 Scale = 1; 392 } 393 } 394 break; 395 } 396 PrevState = CurrState; 397 } 398 void onMinus() { 399 IntelExprState CurrState = State; 400 switch (State) { 401 default: 402 State = IES_ERROR; 403 break; 404 case IES_PLUS: 405 case IES_NOT: 406 case IES_MULTIPLY: 407 case IES_DIVIDE: 408 case IES_LPAREN: 409 case IES_RPAREN: 410 case IES_LBRAC: 411 case IES_RBRAC: 412 case IES_INTEGER: 413 case IES_REGISTER: 414 State = IES_MINUS; 415 // Only push the minus operator if it is not a unary operator. 416 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || 417 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || 418 CurrState == IES_LPAREN || CurrState == IES_LBRAC)) 419 IC.pushOperator(IC_MINUS); 420 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 421 // If we already have a BaseReg, then assume this is the IndexReg with 422 // a scale of 1. 423 if (!BaseReg) { 424 BaseReg = TmpReg; 425 } else { 426 assert (!IndexReg && "BaseReg/IndexReg already set!"); 427 IndexReg = TmpReg; 428 Scale = 1; 429 } 430 } 431 break; 432 } 433 PrevState = CurrState; 434 } 435 void onNot() { 436 IntelExprState CurrState = State; 437 switch (State) { 438 default: 439 State = IES_ERROR; 440 break; 441 case IES_PLUS: 442 case IES_NOT: 443 State = IES_NOT; 444 break; 445 } 446 PrevState = CurrState; 447 } 448 void onRegister(unsigned Reg) { 449 IntelExprState CurrState = State; 450 switch (State) { 451 default: 452 State = IES_ERROR; 453 break; 454 case IES_PLUS: 455 case IES_LPAREN: 456 State = IES_REGISTER; 457 TmpReg = Reg; 458 IC.pushOperand(IC_REGISTER); 459 break; 460 case IES_MULTIPLY: 461 // Index Register - Scale * Register 462 if (PrevState == IES_INTEGER) { 463 assert (!IndexReg && "IndexReg already set!"); 464 State = IES_REGISTER; 465 IndexReg = Reg; 466 // Get the scale and replace the 'Scale * Register' with '0'. 467 Scale = IC.popOperand(); 468 IC.pushOperand(IC_IMM); 469 IC.popOperator(); 470 } else { 471 State = IES_ERROR; 472 } 473 break; 474 } 475 PrevState = CurrState; 476 } 477 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) { 478 PrevState = State; 479 switch (State) { 480 default: 481 State = IES_ERROR; 482 break; 483 case IES_PLUS: 484 case IES_MINUS: 485 case IES_NOT: 486 State = IES_INTEGER; 487 Sym = SymRef; 488 SymName = SymRefName; 489 IC.pushOperand(IC_IMM); 490 break; 491 } 492 } 493 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 494 IntelExprState CurrState = State; 495 switch (State) { 496 default: 497 State = IES_ERROR; 498 break; 499 case IES_PLUS: 500 case IES_MINUS: 501 case IES_NOT: 502 case IES_OR: 503 case IES_XOR: 504 case IES_AND: 505 case IES_LSHIFT: 506 case IES_RSHIFT: 507 case IES_DIVIDE: 508 case IES_MULTIPLY: 509 case IES_LPAREN: 510 State = IES_INTEGER; 511 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 512 // Index Register - Register * Scale 513 assert (!IndexReg && "IndexReg already set!"); 514 IndexReg = TmpReg; 515 Scale = TmpInt; 516 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 517 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 518 return true; 519 } 520 // Get the scale and replace the 'Register * Scale' with '0'. 521 IC.popOperator(); 522 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 523 PrevState == IES_OR || PrevState == IES_AND || 524 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 525 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 526 PrevState == IES_LPAREN || PrevState == IES_LBRAC || 527 PrevState == IES_NOT || PrevState == IES_XOR) && 528 CurrState == IES_MINUS) { 529 // Unary minus. No need to pop the minus operand because it was never 530 // pushed. 531 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. 532 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 533 PrevState == IES_OR || PrevState == IES_AND || 534 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 535 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 536 PrevState == IES_LPAREN || PrevState == IES_LBRAC || 537 PrevState == IES_NOT || PrevState == IES_XOR) && 538 CurrState == IES_NOT) { 539 // Unary not. No need to pop the not operand because it was never 540 // pushed. 541 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm. 542 } else { 543 IC.pushOperand(IC_IMM, TmpInt); 544 } 545 break; 546 } 547 PrevState = CurrState; 548 return false; 549 } 550 void onStar() { 551 PrevState = State; 552 switch (State) { 553 default: 554 State = IES_ERROR; 555 break; 556 case IES_INTEGER: 557 case IES_REGISTER: 558 case IES_RPAREN: 559 State = IES_MULTIPLY; 560 IC.pushOperator(IC_MULTIPLY); 561 break; 562 } 563 } 564 void onDivide() { 565 PrevState = State; 566 switch (State) { 567 default: 568 State = IES_ERROR; 569 break; 570 case IES_INTEGER: 571 case IES_RPAREN: 572 State = IES_DIVIDE; 573 IC.pushOperator(IC_DIVIDE); 574 break; 575 } 576 } 577 void onLBrac() { 578 PrevState = State; 579 switch (State) { 580 default: 581 State = IES_ERROR; 582 break; 583 case IES_RBRAC: 584 State = IES_PLUS; 585 IC.pushOperator(IC_PLUS); 586 break; 587 } 588 } 589 void onRBrac() { 590 IntelExprState CurrState = State; 591 switch (State) { 592 default: 593 State = IES_ERROR; 594 break; 595 case IES_INTEGER: 596 case IES_REGISTER: 597 case IES_RPAREN: 598 State = IES_RBRAC; 599 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 600 // If we already have a BaseReg, then assume this is the IndexReg with 601 // a scale of 1. 602 if (!BaseReg) { 603 BaseReg = TmpReg; 604 } else { 605 assert (!IndexReg && "BaseReg/IndexReg already set!"); 606 IndexReg = TmpReg; 607 Scale = 1; 608 } 609 } 610 break; 611 } 612 PrevState = CurrState; 613 } 614 void onLParen() { 615 IntelExprState CurrState = State; 616 switch (State) { 617 default: 618 State = IES_ERROR; 619 break; 620 case IES_PLUS: 621 case IES_MINUS: 622 case IES_NOT: 623 case IES_OR: 624 case IES_XOR: 625 case IES_AND: 626 case IES_LSHIFT: 627 case IES_RSHIFT: 628 case IES_MULTIPLY: 629 case IES_DIVIDE: 630 case IES_LPAREN: 631 // FIXME: We don't handle this type of unary minus or not, yet. 632 if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 633 PrevState == IES_OR || PrevState == IES_AND || 634 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 635 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 636 PrevState == IES_LPAREN || PrevState == IES_LBRAC || 637 PrevState == IES_NOT || PrevState == IES_XOR) && 638 (CurrState == IES_MINUS || CurrState == IES_NOT)) { 639 State = IES_ERROR; 640 break; 641 } 642 State = IES_LPAREN; 643 IC.pushOperator(IC_LPAREN); 644 break; 645 } 646 PrevState = CurrState; 647 } 648 void onRParen() { 649 PrevState = State; 650 switch (State) { 651 default: 652 State = IES_ERROR; 653 break; 654 case IES_INTEGER: 655 case IES_REGISTER: 656 case IES_RPAREN: 657 State = IES_RPAREN; 658 IC.pushOperator(IC_RPAREN); 659 break; 660 } 661 } 662 }; 663 664 bool Error(SMLoc L, const Twine &Msg, 665 ArrayRef<SMRange> Ranges = None, 666 bool MatchingInlineAsm = false) { 667 MCAsmParser &Parser = getParser(); 668 if (MatchingInlineAsm) return true; 669 return Parser.Error(L, Msg, Ranges); 670 } 671 672 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg, 673 ArrayRef<SMRange> Ranges = None, 674 bool MatchingInlineAsm = false) { 675 MCAsmParser &Parser = getParser(); 676 Parser.eatToEndOfStatement(); 677 return Error(L, Msg, Ranges, MatchingInlineAsm); 678 } 679 680 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) { 681 Error(Loc, Msg); 682 return nullptr; 683 } 684 685 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); 686 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); 687 void AddDefaultSrcDestOperands( 688 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 689 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst); 690 std::unique_ptr<X86Operand> ParseOperand(); 691 std::unique_ptr<X86Operand> ParseATTOperand(); 692 std::unique_ptr<X86Operand> ParseIntelOperand(); 693 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator(); 694 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); 695 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind); 696 std::unique_ptr<X86Operand> 697 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size); 698 std::unique_ptr<X86Operand> 699 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size); 700 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End); 701 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 702 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg, 703 SMLoc Start, 704 int64_t ImmDisp, 705 unsigned Size); 706 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, 707 InlineAsmIdentifierInfo &Info, 708 bool IsUnevaluatedOperand, SMLoc &End); 709 710 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 711 712 std::unique_ptr<X86Operand> 713 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, 714 unsigned IndexReg, unsigned Scale, SMLoc Start, 715 SMLoc End, unsigned Size, StringRef Identifier, 716 InlineAsmIdentifierInfo &Info); 717 718 bool parseDirectiveEven(SMLoc L); 719 bool ParseDirectiveWord(unsigned Size, SMLoc L); 720 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 721 722 bool processInstruction(MCInst &Inst, const OperandVector &Ops); 723 724 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds 725 /// instrumentation around Inst. 726 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); 727 728 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 729 OperandVector &Operands, MCStreamer &Out, 730 uint64_t &ErrorInfo, 731 bool MatchingInlineAsm) override; 732 733 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, 734 MCStreamer &Out, bool MatchingInlineAsm); 735 736 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, 737 bool MatchingInlineAsm); 738 739 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 740 OperandVector &Operands, MCStreamer &Out, 741 uint64_t &ErrorInfo, 742 bool MatchingInlineAsm); 743 744 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 745 OperandVector &Operands, MCStreamer &Out, 746 uint64_t &ErrorInfo, 747 bool MatchingInlineAsm); 748 749 bool OmitRegisterFromClobberLists(unsigned RegNo) override; 750 751 /// doSrcDstMatch - Returns true if operands are matching in their 752 /// word size (%si and %di, %esi and %edi, etc.). Order depends on 753 /// the parsing mode (Intel vs. AT&T). 754 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2); 755 756 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 757 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 758 /// \return \c true if no parsing errors occurred, \c false otherwise. 759 bool HandleAVX512Operand(OperandVector &Operands, 760 const MCParsedAsmOperand &Op); 761 762 bool is64BitMode() const { 763 // FIXME: Can tablegen auto-generate this? 764 return getSTI().getFeatureBits()[X86::Mode64Bit]; 765 } 766 bool is32BitMode() const { 767 // FIXME: Can tablegen auto-generate this? 768 return getSTI().getFeatureBits()[X86::Mode32Bit]; 769 } 770 bool is16BitMode() const { 771 // FIXME: Can tablegen auto-generate this? 772 return getSTI().getFeatureBits()[X86::Mode16Bit]; 773 } 774 void SwitchMode(unsigned mode) { 775 MCSubtargetInfo &STI = copySTI(); 776 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit}); 777 FeatureBitset OldMode = STI.getFeatureBits() & AllModes; 778 unsigned FB = ComputeAvailableFeatures( 779 STI.ToggleFeature(OldMode.flip(mode))); 780 setAvailableFeatures(FB); 781 782 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); 783 } 784 785 unsigned getPointerWidth() { 786 if (is16BitMode()) return 16; 787 if (is32BitMode()) return 32; 788 if (is64BitMode()) return 64; 789 llvm_unreachable("invalid mode"); 790 } 791 792 bool isParsingIntelSyntax() { 793 return getParser().getAssemblerDialect(); 794 } 795 796 /// @name Auto-generated Matcher Functions 797 /// { 798 799#define GET_ASSEMBLER_HEADER 800#include "X86GenAsmMatcher.inc" 801 802 /// } 803 804public: 805 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, 806 const MCInstrInfo &mii, const MCTargetOptions &Options) 807 : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr) { 808 809 // Initialize the set of available features. 810 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); 811 Instrumentation.reset( 812 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI)); 813 } 814 815 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 816 817 void SetFrameRegister(unsigned RegNo) override; 818 819 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 820 SMLoc NameLoc, OperandVector &Operands) override; 821 822 bool ParseDirective(AsmToken DirectiveID) override; 823}; 824} // end anonymous namespace 825 826/// @name Auto-generated Match Functions 827/// { 828 829static unsigned MatchRegisterName(StringRef Name); 830 831/// } 832 833static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg, 834 StringRef &ErrMsg) { 835 // If we have both a base register and an index register make sure they are 836 // both 64-bit or 32-bit registers. 837 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 838 if (BaseReg != 0 && IndexReg != 0) { 839 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 840 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 841 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && 842 IndexReg != X86::RIZ) { 843 ErrMsg = "base register is 64-bit, but index register is not"; 844 return true; 845 } 846 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 847 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 848 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && 849 IndexReg != X86::EIZ){ 850 ErrMsg = "base register is 32-bit, but index register is not"; 851 return true; 852 } 853 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 854 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 855 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 856 ErrMsg = "base register is 16-bit, but index register is not"; 857 return true; 858 } 859 if (((BaseReg == X86::BX || BaseReg == X86::BP) && 860 IndexReg != X86::SI && IndexReg != X86::DI) || 861 ((BaseReg == X86::SI || BaseReg == X86::DI) && 862 IndexReg != X86::BX && IndexReg != X86::BP)) { 863 ErrMsg = "invalid 16-bit base/index register combination"; 864 return true; 865 } 866 } 867 } 868 return false; 869} 870 871bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2) 872{ 873 // Return true and let a normal complaint about bogus operands happen. 874 if (!Op1.isMem() || !Op2.isMem()) 875 return true; 876 877 // Actually these might be the other way round if Intel syntax is 878 // being used. It doesn't matter. 879 unsigned diReg = Op1.Mem.BaseReg; 880 unsigned siReg = Op2.Mem.BaseReg; 881 882 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg)) 883 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg); 884 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg)) 885 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg); 886 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg)) 887 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg); 888 // Again, return true and let another error happen. 889 return true; 890} 891 892bool X86AsmParser::ParseRegister(unsigned &RegNo, 893 SMLoc &StartLoc, SMLoc &EndLoc) { 894 MCAsmParser &Parser = getParser(); 895 RegNo = 0; 896 const AsmToken &PercentTok = Parser.getTok(); 897 StartLoc = PercentTok.getLoc(); 898 899 // If we encounter a %, ignore it. This code handles registers with and 900 // without the prefix, unprefixed registers can occur in cfi directives. 901 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) 902 Parser.Lex(); // Eat percent token. 903 904 const AsmToken &Tok = Parser.getTok(); 905 EndLoc = Tok.getEndLoc(); 906 907 if (Tok.isNot(AsmToken::Identifier)) { 908 if (isParsingIntelSyntax()) return true; 909 return Error(StartLoc, "invalid register name", 910 SMRange(StartLoc, EndLoc)); 911 } 912 913 RegNo = MatchRegisterName(Tok.getString()); 914 915 // If the match failed, try the register name as lowercase. 916 if (RegNo == 0) 917 RegNo = MatchRegisterName(Tok.getString().lower()); 918 919 // The "flags" register cannot be referenced directly. 920 // Treat it as an identifier instead. 921 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS) 922 RegNo = 0; 923 924 if (!is64BitMode()) { 925 // FIXME: This should be done using Requires<Not64BitMode> and 926 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 927 // checked. 928 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 929 // REX prefix. 930 if (RegNo == X86::RIZ || 931 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 932 X86II::isX86_64NonExtLowByteReg(RegNo) || 933 X86II::isX86_64ExtendedReg(RegNo)) 934 return Error(StartLoc, "register %" 935 + Tok.getString() + " is only available in 64-bit mode", 936 SMRange(StartLoc, EndLoc)); 937 } 938 939 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 940 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { 941 RegNo = X86::ST0; 942 Parser.Lex(); // Eat 'st' 943 944 // Check to see if we have '(4)' after %st. 945 if (getLexer().isNot(AsmToken::LParen)) 946 return false; 947 // Lex the paren. 948 getParser().Lex(); 949 950 const AsmToken &IntTok = Parser.getTok(); 951 if (IntTok.isNot(AsmToken::Integer)) 952 return Error(IntTok.getLoc(), "expected stack index"); 953 switch (IntTok.getIntVal()) { 954 case 0: RegNo = X86::ST0; break; 955 case 1: RegNo = X86::ST1; break; 956 case 2: RegNo = X86::ST2; break; 957 case 3: RegNo = X86::ST3; break; 958 case 4: RegNo = X86::ST4; break; 959 case 5: RegNo = X86::ST5; break; 960 case 6: RegNo = X86::ST6; break; 961 case 7: RegNo = X86::ST7; break; 962 default: return Error(IntTok.getLoc(), "invalid stack index"); 963 } 964 965 if (getParser().Lex().isNot(AsmToken::RParen)) 966 return Error(Parser.getTok().getLoc(), "expected ')'"); 967 968 EndLoc = Parser.getTok().getEndLoc(); 969 Parser.Lex(); // Eat ')' 970 return false; 971 } 972 973 EndLoc = Parser.getTok().getEndLoc(); 974 975 // If this is "db[0-7]", match it as an alias 976 // for dr[0-7]. 977 if (RegNo == 0 && Tok.getString().size() == 3 && 978 Tok.getString().startswith("db")) { 979 switch (Tok.getString()[2]) { 980 case '0': RegNo = X86::DR0; break; 981 case '1': RegNo = X86::DR1; break; 982 case '2': RegNo = X86::DR2; break; 983 case '3': RegNo = X86::DR3; break; 984 case '4': RegNo = X86::DR4; break; 985 case '5': RegNo = X86::DR5; break; 986 case '6': RegNo = X86::DR6; break; 987 case '7': RegNo = X86::DR7; break; 988 } 989 990 if (RegNo != 0) { 991 EndLoc = Parser.getTok().getEndLoc(); 992 Parser.Lex(); // Eat it. 993 return false; 994 } 995 } 996 997 if (RegNo == 0) { 998 if (isParsingIntelSyntax()) return true; 999 return Error(StartLoc, "invalid register name", 1000 SMRange(StartLoc, EndLoc)); 1001 } 1002 1003 Parser.Lex(); // Eat identifier token. 1004 return false; 1005} 1006 1007void X86AsmParser::SetFrameRegister(unsigned RegNo) { 1008 Instrumentation->SetInitialFrameRegister(RegNo); 1009} 1010 1011std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 1012 unsigned basereg = 1013 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI); 1014 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1015 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1016 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1, 1017 Loc, Loc, 0); 1018} 1019 1020std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 1021 unsigned basereg = 1022 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI); 1023 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1024 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1025 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1, 1026 Loc, Loc, 0); 1027} 1028 1029void X86AsmParser::AddDefaultSrcDestOperands( 1030 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1031 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) { 1032 if (isParsingIntelSyntax()) { 1033 Operands.push_back(std::move(Dst)); 1034 Operands.push_back(std::move(Src)); 1035 } 1036 else { 1037 Operands.push_back(std::move(Src)); 1038 Operands.push_back(std::move(Dst)); 1039 } 1040} 1041 1042std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() { 1043 if (isParsingIntelSyntax()) 1044 return ParseIntelOperand(); 1045 return ParseATTOperand(); 1046} 1047 1048/// getIntelMemOperandSize - Return intel memory operand size. 1049static unsigned getIntelMemOperandSize(StringRef OpStr) { 1050 unsigned Size = StringSwitch<unsigned>(OpStr) 1051 .Cases("BYTE", "byte", 8) 1052 .Cases("WORD", "word", 16) 1053 .Cases("DWORD", "dword", 32) 1054 .Cases("FWORD", "fword", 48) 1055 .Cases("QWORD", "qword", 64) 1056 .Cases("MMWORD","mmword", 64) 1057 .Cases("XWORD", "xword", 80) 1058 .Cases("TBYTE", "tbyte", 80) 1059 .Cases("XMMWORD", "xmmword", 128) 1060 .Cases("YMMWORD", "ymmword", 256) 1061 .Cases("ZMMWORD", "zmmword", 512) 1062 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter 1063 .Default(0); 1064 return Size; 1065} 1066 1067std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm( 1068 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, 1069 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, 1070 InlineAsmIdentifierInfo &Info) { 1071 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or 1072 // some other label reference. 1073 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) { 1074 // Insert an explicit size if the user didn't have one. 1075 if (!Size) { 1076 Size = getPointerWidth(); 1077 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, 1078 /*Len=*/0, Size); 1079 } 1080 1081 // Create an absolute memory reference in order to match against 1082 // instructions taking a PC relative operand. 1083 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size, 1084 Identifier, Info.OpDecl); 1085 } 1086 1087 // We either have a direct symbol reference, or an offset from a symbol. The 1088 // parser always puts the symbol on the LHS, so look there for size 1089 // calculation purposes. 1090 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp); 1091 bool IsSymRef = 1092 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp); 1093 if (IsSymRef) { 1094 if (!Size) { 1095 Size = Info.Type * 8; // Size is in terms of bits in this context. 1096 if (Size) 1097 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, 1098 /*Len=*/0, Size); 1099 } 1100 } 1101 1102 // When parsing inline assembly we set the base register to a non-zero value 1103 // if we don't know the actual value at this time. This is necessary to 1104 // get the matching correct in some cases. 1105 BaseReg = BaseReg ? BaseReg : 1; 1106 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 1107 IndexReg, Scale, Start, End, Size, Identifier, 1108 Info.OpDecl); 1109} 1110 1111static void 1112RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites, 1113 StringRef SymName, int64_t ImmDisp, 1114 int64_t FinalImmDisp, SMLoc &BracLoc, 1115 SMLoc &StartInBrac, SMLoc &End) { 1116 // Remove the '[' and ']' from the IR string. 1117 AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1); 1118 AsmRewrites.emplace_back(AOK_Skip, End, 1); 1119 1120 // If ImmDisp is non-zero, then we parsed a displacement before the 1121 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) 1122 // If ImmDisp doesn't match the displacement computed by the state machine 1123 // then we have an additional displacement in the bracketed expression. 1124 if (ImmDisp != FinalImmDisp) { 1125 if (ImmDisp) { 1126 // We have an immediate displacement before the bracketed expression. 1127 // Adjust this to match the final immediate displacement. 1128 bool Found = false; 1129 for (AsmRewrite &AR : AsmRewrites) { 1130 if (AR.Loc.getPointer() > BracLoc.getPointer()) 1131 continue; 1132 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) { 1133 assert (!Found && "ImmDisp already rewritten."); 1134 AR.Kind = AOK_Imm; 1135 AR.Len = BracLoc.getPointer() - AR.Loc.getPointer(); 1136 AR.Val = FinalImmDisp; 1137 Found = true; 1138 break; 1139 } 1140 } 1141 assert (Found && "Unable to rewrite ImmDisp."); 1142 (void)Found; 1143 } else { 1144 // We have a symbolic and an immediate displacement, but no displacement 1145 // before the bracketed expression. Put the immediate displacement 1146 // before the bracketed expression. 1147 AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp); 1148 } 1149 } 1150 // Remove all the ImmPrefix rewrites within the brackets. 1151 for (AsmRewrite &AR : AsmRewrites) { 1152 if (AR.Loc.getPointer() < StartInBrac.getPointer()) 1153 continue; 1154 if (AR.Kind == AOK_ImmPrefix) 1155 AR.Kind = AOK_Delete; 1156 } 1157 const char *SymLocPtr = SymName.data(); 1158 // Skip everything before the symbol. 1159 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { 1160 assert(Len > 0 && "Expected a non-negative length."); 1161 AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len); 1162 } 1163 // Skip everything after the symbol. 1164 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { 1165 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); 1166 assert(Len > 0 && "Expected a non-negative length."); 1167 AsmRewrites.emplace_back(AOK_Skip, Loc, Len); 1168 } 1169} 1170 1171bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1172 MCAsmParser &Parser = getParser(); 1173 const AsmToken &Tok = Parser.getTok(); 1174 1175 bool Done = false; 1176 while (!Done) { 1177 bool UpdateLocLex = true; 1178 1179 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an 1180 // identifier. Don't try an parse it as a register. 1181 if (Tok.getString().startswith(".")) 1182 break; 1183 1184 // If we're parsing an immediate expression, we don't expect a '['. 1185 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) 1186 break; 1187 1188 AsmToken::TokenKind TK = getLexer().getKind(); 1189 switch (TK) { 1190 default: { 1191 if (SM.isValidEndState()) { 1192 Done = true; 1193 break; 1194 } 1195 return Error(Tok.getLoc(), "unknown token in expression"); 1196 } 1197 case AsmToken::EndOfStatement: { 1198 Done = true; 1199 break; 1200 } 1201 case AsmToken::String: 1202 case AsmToken::Identifier: { 1203 // This could be a register or a symbolic displacement. 1204 unsigned TmpReg; 1205 const MCExpr *Val; 1206 SMLoc IdentLoc = Tok.getLoc(); 1207 StringRef Identifier = Tok.getString(); 1208 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) { 1209 SM.onRegister(TmpReg); 1210 UpdateLocLex = false; 1211 break; 1212 } else { 1213 if (!isParsingInlineAsm()) { 1214 if (getParser().parsePrimaryExpr(Val, End)) 1215 return Error(Tok.getLoc(), "Unexpected identifier!"); 1216 } else { 1217 // This is a dot operator, not an adjacent identifier. 1218 if (Identifier.find('.') != StringRef::npos) { 1219 return false; 1220 } else { 1221 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1222 if (ParseIntelIdentifier(Val, Identifier, Info, 1223 /*Unevaluated=*/false, End)) 1224 return true; 1225 } 1226 } 1227 SM.onIdentifierExpr(Val, Identifier); 1228 UpdateLocLex = false; 1229 break; 1230 } 1231 return Error(Tok.getLoc(), "Unexpected identifier!"); 1232 } 1233 case AsmToken::Integer: { 1234 StringRef ErrMsg; 1235 if (isParsingInlineAsm() && SM.getAddImmPrefix()) 1236 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc()); 1237 // Look for 'b' or 'f' following an Integer as a directional label 1238 SMLoc Loc = getTok().getLoc(); 1239 int64_t IntVal = getTok().getIntVal(); 1240 End = consumeToken(); 1241 UpdateLocLex = false; 1242 if (getLexer().getKind() == AsmToken::Identifier) { 1243 StringRef IDVal = getTok().getString(); 1244 if (IDVal == "f" || IDVal == "b") { 1245 MCSymbol *Sym = 1246 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); 1247 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1248 const MCExpr *Val = 1249 MCSymbolRefExpr::create(Sym, Variant, getContext()); 1250 if (IDVal == "b" && Sym->isUndefined()) 1251 return Error(Loc, "invalid reference to undefined symbol"); 1252 StringRef Identifier = Sym->getName(); 1253 SM.onIdentifierExpr(Val, Identifier); 1254 End = consumeToken(); 1255 } else { 1256 if (SM.onInteger(IntVal, ErrMsg)) 1257 return Error(Loc, ErrMsg); 1258 } 1259 } else { 1260 if (SM.onInteger(IntVal, ErrMsg)) 1261 return Error(Loc, ErrMsg); 1262 } 1263 break; 1264 } 1265 case AsmToken::Plus: SM.onPlus(); break; 1266 case AsmToken::Minus: SM.onMinus(); break; 1267 case AsmToken::Tilde: SM.onNot(); break; 1268 case AsmToken::Star: SM.onStar(); break; 1269 case AsmToken::Slash: SM.onDivide(); break; 1270 case AsmToken::Pipe: SM.onOr(); break; 1271 case AsmToken::Caret: SM.onXor(); break; 1272 case AsmToken::Amp: SM.onAnd(); break; 1273 case AsmToken::LessLess: 1274 SM.onLShift(); break; 1275 case AsmToken::GreaterGreater: 1276 SM.onRShift(); break; 1277 case AsmToken::LBrac: SM.onLBrac(); break; 1278 case AsmToken::RBrac: SM.onRBrac(); break; 1279 case AsmToken::LParen: SM.onLParen(); break; 1280 case AsmToken::RParen: SM.onRParen(); break; 1281 } 1282 if (SM.hadError()) 1283 return Error(Tok.getLoc(), "unknown token in expression"); 1284 1285 if (!Done && UpdateLocLex) 1286 End = consumeToken(); 1287 } 1288 return false; 1289} 1290 1291std::unique_ptr<X86Operand> 1292X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, 1293 int64_t ImmDisp, unsigned Size) { 1294 MCAsmParser &Parser = getParser(); 1295 const AsmToken &Tok = Parser.getTok(); 1296 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); 1297 if (getLexer().isNot(AsmToken::LBrac)) 1298 return ErrorOperand(BracLoc, "Expected '[' token!"); 1299 Parser.Lex(); // Eat '[' 1300 1301 SMLoc StartInBrac = Tok.getLoc(); 1302 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We 1303 // may have already parsed an immediate displacement before the bracketed 1304 // expression. 1305 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); 1306 if (ParseIntelExpression(SM, End)) 1307 return nullptr; 1308 1309 const MCExpr *Disp = nullptr; 1310 if (const MCExpr *Sym = SM.getSym()) { 1311 // A symbolic displacement. 1312 Disp = Sym; 1313 if (isParsingInlineAsm()) 1314 RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(), 1315 ImmDisp, SM.getImm(), BracLoc, StartInBrac, 1316 End); 1317 } 1318 1319 if (SM.getImm() || !Disp) { 1320 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext()); 1321 if (Disp) 1322 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext()); 1323 else 1324 Disp = Imm; // An immediate displacement only. 1325 } 1326 1327 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC 1328 // will in fact do global lookup the field name inside all global typedefs, 1329 // but we don't emulate that. 1330 if (Tok.getString().find('.') != StringRef::npos) { 1331 const MCExpr *NewDisp; 1332 if (ParseIntelDotOperator(Disp, NewDisp)) 1333 return nullptr; 1334 1335 End = Tok.getEndLoc(); 1336 Parser.Lex(); // Eat the field. 1337 Disp = NewDisp; 1338 } 1339 1340 int BaseReg = SM.getBaseReg(); 1341 int IndexReg = SM.getIndexReg(); 1342 int Scale = SM.getScale(); 1343 if (!isParsingInlineAsm()) { 1344 // handle [-42] 1345 if (!BaseReg && !IndexReg) { 1346 if (!SegReg) 1347 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size); 1348 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, 1349 Start, End, Size); 1350 } 1351 StringRef ErrMsg; 1352 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { 1353 Error(StartInBrac, ErrMsg); 1354 return nullptr; 1355 } 1356 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 1357 IndexReg, Scale, Start, End, Size); 1358 } 1359 1360 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1361 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1362 End, Size, SM.getSymName(), Info); 1363} 1364 1365// Inline assembly may use variable names with namespace alias qualifiers. 1366bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, 1367 StringRef &Identifier, 1368 InlineAsmIdentifierInfo &Info, 1369 bool IsUnevaluatedOperand, SMLoc &End) { 1370 MCAsmParser &Parser = getParser(); 1371 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly."); 1372 Val = nullptr; 1373 1374 StringRef LineBuf(Identifier.data()); 1375 void *Result = 1376 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 1377 1378 const AsmToken &Tok = Parser.getTok(); 1379 SMLoc Loc = Tok.getLoc(); 1380 1381 // Advance the token stream until the end of the current token is 1382 // after the end of what the frontend claimed. 1383 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 1384 do { 1385 End = Tok.getEndLoc(); 1386 getLexer().Lex(); 1387 } while (End.getPointer() < EndPtr); 1388 Identifier = LineBuf; 1389 1390 // The frontend should end parsing on an assembler token boundary, unless it 1391 // failed parsing. 1392 assert((End.getPointer() == EndPtr || !Result) && 1393 "frontend claimed part of a token?"); 1394 1395 // If the identifier lookup was unsuccessful, assume that we are dealing with 1396 // a label. 1397 if (!Result) { 1398 StringRef InternalName = 1399 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), 1400 Loc, false); 1401 assert(InternalName.size() && "We should have an internal name here."); 1402 // Push a rewrite for replacing the identifier name with the internal name. 1403 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), 1404 InternalName); 1405 } 1406 1407 // Create the symbol reference. 1408 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); 1409 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1410 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); 1411 return false; 1412} 1413 1414/// \brief Parse intel style segment override. 1415std::unique_ptr<X86Operand> 1416X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, 1417 unsigned Size) { 1418 MCAsmParser &Parser = getParser(); 1419 assert(SegReg != 0 && "Tried to parse a segment override without a segment!"); 1420 const AsmToken &Tok = Parser.getTok(); // Eat colon. 1421 if (Tok.isNot(AsmToken::Colon)) 1422 return ErrorOperand(Tok.getLoc(), "Expected ':' token!"); 1423 Parser.Lex(); // Eat ':' 1424 1425 int64_t ImmDisp = 0; 1426 if (getLexer().is(AsmToken::Integer)) { 1427 ImmDisp = Tok.getIntVal(); 1428 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer. 1429 1430 if (isParsingInlineAsm()) 1431 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc()); 1432 1433 if (getLexer().isNot(AsmToken::LBrac)) { 1434 // An immediate following a 'segment register', 'colon' token sequence can 1435 // be followed by a bracketed expression. If it isn't we know we have our 1436 // final segment override. 1437 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext()); 1438 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 1439 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1, 1440 Start, ImmDispToken.getEndLoc(), Size); 1441 } 1442 } 1443 1444 if (getLexer().is(AsmToken::LBrac)) 1445 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); 1446 1447 const MCExpr *Val; 1448 SMLoc End; 1449 if (!isParsingInlineAsm()) { 1450 if (getParser().parsePrimaryExpr(Val, End)) 1451 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1452 1453 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size); 1454 } 1455 1456 InlineAsmIdentifierInfo Info; 1457 StringRef Identifier = Tok.getString(); 1458 if (ParseIntelIdentifier(Val, Identifier, Info, 1459 /*Unevaluated=*/false, End)) 1460 return nullptr; 1461 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, 1462 /*Scale=*/1, Start, End, Size, Identifier, Info); 1463} 1464 1465//ParseRoundingModeOp - Parse AVX-512 rounding mode operand 1466std::unique_ptr<X86Operand> 1467X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { 1468 MCAsmParser &Parser = getParser(); 1469 const AsmToken &Tok = Parser.getTok(); 1470 // Eat "{" and mark the current place. 1471 const SMLoc consumedToken = consumeToken(); 1472 if (Tok.getIdentifier().startswith("r")){ 1473 int rndMode = StringSwitch<int>(Tok.getIdentifier()) 1474 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) 1475 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) 1476 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) 1477 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) 1478 .Default(-1); 1479 if (-1 == rndMode) 1480 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode."); 1481 Parser.Lex(); // Eat "r*" of r*-sae 1482 if (!getLexer().is(AsmToken::Minus)) 1483 return ErrorOperand(Tok.getLoc(), "Expected - at this point"); 1484 Parser.Lex(); // Eat "-" 1485 Parser.Lex(); // Eat the sae 1486 if (!getLexer().is(AsmToken::RCurly)) 1487 return ErrorOperand(Tok.getLoc(), "Expected } at this point"); 1488 Parser.Lex(); // Eat "}" 1489 const MCExpr *RndModeOp = 1490 MCConstantExpr::create(rndMode, Parser.getContext()); 1491 return X86Operand::CreateImm(RndModeOp, Start, End); 1492 } 1493 if(Tok.getIdentifier().equals("sae")){ 1494 Parser.Lex(); // Eat the sae 1495 if (!getLexer().is(AsmToken::RCurly)) 1496 return ErrorOperand(Tok.getLoc(), "Expected } at this point"); 1497 Parser.Lex(); // Eat "}" 1498 return X86Operand::CreateToken("{sae}", consumedToken); 1499 } 1500 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1501} 1502/// ParseIntelMemOperand - Parse intel style memory operand. 1503std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, 1504 SMLoc Start, 1505 unsigned Size) { 1506 MCAsmParser &Parser = getParser(); 1507 const AsmToken &Tok = Parser.getTok(); 1508 SMLoc End; 1509 1510 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. 1511 if (getLexer().is(AsmToken::LBrac)) 1512 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size); 1513 assert(ImmDisp == 0); 1514 1515 const MCExpr *Val; 1516 if (!isParsingInlineAsm()) { 1517 if (getParser().parsePrimaryExpr(Val, End)) 1518 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1519 1520 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size); 1521 } 1522 1523 InlineAsmIdentifierInfo Info; 1524 StringRef Identifier = Tok.getString(); 1525 if (ParseIntelIdentifier(Val, Identifier, Info, 1526 /*Unevaluated=*/false, End)) 1527 return nullptr; 1528 1529 if (!getLexer().is(AsmToken::LBrac)) 1530 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0, 1531 /*Scale=*/1, Start, End, Size, Identifier, Info); 1532 1533 Parser.Lex(); // Eat '[' 1534 1535 // Parse Identifier [ ImmDisp ] 1536 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true, 1537 /*AddImmPrefix=*/false); 1538 if (ParseIntelExpression(SM, End)) 1539 return nullptr; 1540 1541 if (SM.getSym()) { 1542 Error(Start, "cannot use more than one symbol in memory operand"); 1543 return nullptr; 1544 } 1545 if (SM.getBaseReg()) { 1546 Error(Start, "cannot use base register with variable reference"); 1547 return nullptr; 1548 } 1549 if (SM.getIndexReg()) { 1550 Error(Start, "cannot use index register with variable reference"); 1551 return nullptr; 1552 } 1553 1554 const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext()); 1555 // BaseReg is non-zero to avoid assertions. In the context of inline asm, 1556 // we're pointing to a local variable in memory, so the base register is 1557 // really the frame or stack pointer. 1558 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1559 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1, 1560 Start, End, Size, Identifier, Info.OpDecl); 1561} 1562 1563/// Parse the '.' operator. 1564bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, 1565 const MCExpr *&NewDisp) { 1566 MCAsmParser &Parser = getParser(); 1567 const AsmToken &Tok = Parser.getTok(); 1568 int64_t OrigDispVal, DotDispVal; 1569 1570 // FIXME: Handle non-constant expressions. 1571 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) 1572 OrigDispVal = OrigDisp->getValue(); 1573 else 1574 return Error(Tok.getLoc(), "Non-constant offsets are not supported!"); 1575 1576 // Drop the optional '.'. 1577 StringRef DotDispStr = Tok.getString(); 1578 if (DotDispStr.startswith(".")) 1579 DotDispStr = DotDispStr.drop_front(1); 1580 1581 // .Imm gets lexed as a real. 1582 if (Tok.is(AsmToken::Real)) { 1583 APInt DotDisp; 1584 DotDispStr.getAsInteger(10, DotDisp); 1585 DotDispVal = DotDisp.getZExtValue(); 1586 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1587 unsigned DotDisp; 1588 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 1589 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, 1590 DotDisp)) 1591 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 1592 DotDispVal = DotDisp; 1593 } else 1594 return Error(Tok.getLoc(), "Unexpected token type!"); 1595 1596 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1597 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); 1598 unsigned Len = DotDispStr.size(); 1599 unsigned Val = OrigDispVal + DotDispVal; 1600 InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val); 1601 } 1602 1603 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext()); 1604 return false; 1605} 1606 1607/// Parse the 'offset' operator. This operator is used to specify the 1608/// location rather then the content of a variable. 1609std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() { 1610 MCAsmParser &Parser = getParser(); 1611 const AsmToken &Tok = Parser.getTok(); 1612 SMLoc OffsetOfLoc = Tok.getLoc(); 1613 Parser.Lex(); // Eat offset. 1614 1615 const MCExpr *Val; 1616 InlineAsmIdentifierInfo Info; 1617 SMLoc Start = Tok.getLoc(), End; 1618 StringRef Identifier = Tok.getString(); 1619 if (ParseIntelIdentifier(Val, Identifier, Info, 1620 /*Unevaluated=*/false, End)) 1621 return nullptr; 1622 1623 // Don't emit the offset operator. 1624 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7); 1625 1626 // The offset operator will have an 'r' constraint, thus we need to create 1627 // register operand to ensure proper matching. Just pick a GPR based on 1628 // the size of a pointer. 1629 unsigned RegNo = 1630 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX); 1631 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, 1632 OffsetOfLoc, Identifier, Info.OpDecl); 1633} 1634 1635enum IntelOperatorKind { 1636 IOK_LENGTH, 1637 IOK_SIZE, 1638 IOK_TYPE 1639}; 1640 1641/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 1642/// returns the number of elements in an array. It returns the value 1 for 1643/// non-array variables. The SIZE operator returns the size of a C or C++ 1644/// variable. A variable's size is the product of its LENGTH and TYPE. The 1645/// TYPE operator returns the size of a C or C++ type or variable. If the 1646/// variable is an array, TYPE returns the size of a single element. 1647std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) { 1648 MCAsmParser &Parser = getParser(); 1649 const AsmToken &Tok = Parser.getTok(); 1650 SMLoc TypeLoc = Tok.getLoc(); 1651 Parser.Lex(); // Eat operator. 1652 1653 const MCExpr *Val = nullptr; 1654 InlineAsmIdentifierInfo Info; 1655 SMLoc Start = Tok.getLoc(), End; 1656 StringRef Identifier = Tok.getString(); 1657 if (ParseIntelIdentifier(Val, Identifier, Info, 1658 /*Unevaluated=*/true, End)) 1659 return nullptr; 1660 1661 if (!Info.OpDecl) 1662 return ErrorOperand(Start, "unable to lookup expression"); 1663 1664 unsigned CVal = 0; 1665 switch(OpKind) { 1666 default: llvm_unreachable("Unexpected operand kind!"); 1667 case IOK_LENGTH: CVal = Info.Length; break; 1668 case IOK_SIZE: CVal = Info.Size; break; 1669 case IOK_TYPE: CVal = Info.Type; break; 1670 } 1671 1672 // Rewrite the type operator and the C or C++ type or variable in terms of an 1673 // immediate. E.g. TYPE foo -> $$4 1674 unsigned Len = End.getPointer() - TypeLoc.getPointer(); 1675 InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal); 1676 1677 const MCExpr *Imm = MCConstantExpr::create(CVal, getContext()); 1678 return X86Operand::CreateImm(Imm, Start, End); 1679} 1680 1681std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { 1682 MCAsmParser &Parser = getParser(); 1683 const AsmToken &Tok = Parser.getTok(); 1684 SMLoc Start, End; 1685 1686 // Offset, length, type and size operators. 1687 if (isParsingInlineAsm()) { 1688 StringRef AsmTokStr = Tok.getString(); 1689 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") 1690 return ParseIntelOffsetOfOperator(); 1691 if (AsmTokStr == "length" || AsmTokStr == "LENGTH") 1692 return ParseIntelOperator(IOK_LENGTH); 1693 if (AsmTokStr == "size" || AsmTokStr == "SIZE") 1694 return ParseIntelOperator(IOK_SIZE); 1695 if (AsmTokStr == "type" || AsmTokStr == "TYPE") 1696 return ParseIntelOperator(IOK_TYPE); 1697 } 1698 1699 bool PtrInOperand = false; 1700 unsigned Size = getIntelMemOperandSize(Tok.getString()); 1701 if (Size) { 1702 Parser.Lex(); // Eat operand size (e.g., byte, word). 1703 if (Tok.getString() != "PTR" && Tok.getString() != "ptr") 1704 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); 1705 Parser.Lex(); // Eat ptr. 1706 PtrInOperand = true; 1707 } 1708 Start = Tok.getLoc(); 1709 1710 // Immediate. 1711 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) || 1712 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) { 1713 AsmToken StartTok = Tok; 1714 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, 1715 /*AddImmPrefix=*/false); 1716 if (ParseIntelExpression(SM, End)) 1717 return nullptr; 1718 1719 int64_t Imm = SM.getImm(); 1720 if (isParsingInlineAsm()) { 1721 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); 1722 if (StartTok.getString().size() == Len) 1723 // Just add a prefix if this wasn't a complex immediate expression. 1724 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start); 1725 else 1726 // Otherwise, rewrite the complex expression as a single immediate. 1727 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm); 1728 } 1729 1730 if (getLexer().isNot(AsmToken::LBrac)) { 1731 // If a directional label (ie. 1f or 2b) was parsed above from 1732 // ParseIntelExpression() then SM.getSym() was set to a pointer to 1733 // to the MCExpr with the directional local symbol and this is a 1734 // memory operand not an immediate operand. 1735 if (SM.getSym()) 1736 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End, 1737 Size); 1738 1739 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext()); 1740 return X86Operand::CreateImm(ImmExpr, Start, End); 1741 } 1742 1743 // Only positive immediates are valid. 1744 if (Imm < 0) 1745 return ErrorOperand(Start, "expected a positive immediate displacement " 1746 "before bracketed expr."); 1747 1748 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. 1749 return ParseIntelMemOperand(Imm, Start, Size); 1750 } 1751 1752 // rounding mode token 1753 if (getSTI().getFeatureBits()[X86::FeatureAVX512] && 1754 getLexer().is(AsmToken::LCurly)) 1755 return ParseRoundingModeOp(Start, End); 1756 1757 // Register. 1758 unsigned RegNo = 0; 1759 if (!ParseRegister(RegNo, Start, End)) { 1760 // If this is a segment register followed by a ':', then this is the start 1761 // of a segment override, otherwise this is a normal register reference. 1762 // In case it is a normal register and there is ptr in the operand this 1763 // is an error 1764 if (getLexer().isNot(AsmToken::Colon)){ 1765 if (PtrInOperand){ 1766 return ErrorOperand(Start, "expected memory operand after " 1767 "'ptr', found register operand instead"); 1768 } 1769 return X86Operand::CreateReg(RegNo, Start, End); 1770 } 1771 1772 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size); 1773 } 1774 1775 // Memory operand. 1776 return ParseIntelMemOperand(/*Disp=*/0, Start, Size); 1777} 1778 1779std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() { 1780 MCAsmParser &Parser = getParser(); 1781 switch (getLexer().getKind()) { 1782 default: 1783 // Parse a memory operand with no segment register. 1784 return ParseMemOperand(0, Parser.getTok().getLoc()); 1785 case AsmToken::Percent: { 1786 // Read the register. 1787 unsigned RegNo; 1788 SMLoc Start, End; 1789 if (ParseRegister(RegNo, Start, End)) return nullptr; 1790 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 1791 Error(Start, "%eiz and %riz can only be used as index registers", 1792 SMRange(Start, End)); 1793 return nullptr; 1794 } 1795 1796 // If this is a segment register followed by a ':', then this is the start 1797 // of a memory reference, otherwise this is a normal register reference. 1798 if (getLexer().isNot(AsmToken::Colon)) 1799 return X86Operand::CreateReg(RegNo, Start, End); 1800 1801 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) 1802 return ErrorOperand(Start, "invalid segment register"); 1803 1804 getParser().Lex(); // Eat the colon. 1805 return ParseMemOperand(RegNo, Start); 1806 } 1807 case AsmToken::Dollar: { 1808 // $42 -> immediate. 1809 SMLoc Start = Parser.getTok().getLoc(), End; 1810 Parser.Lex(); 1811 const MCExpr *Val; 1812 if (getParser().parseExpression(Val, End)) 1813 return nullptr; 1814 return X86Operand::CreateImm(Val, Start, End); 1815 } 1816 case AsmToken::LCurly:{ 1817 SMLoc Start = Parser.getTok().getLoc(), End; 1818 if (getSTI().getFeatureBits()[X86::FeatureAVX512]) 1819 return ParseRoundingModeOp(Start, End); 1820 return ErrorOperand(Start, "unknown token in expression"); 1821 } 1822 } 1823} 1824 1825bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, 1826 const MCParsedAsmOperand &Op) { 1827 MCAsmParser &Parser = getParser(); 1828 if(getSTI().getFeatureBits()[X86::FeatureAVX512]) { 1829 if (getLexer().is(AsmToken::LCurly)) { 1830 // Eat "{" and mark the current place. 1831 const SMLoc consumedToken = consumeToken(); 1832 // Distinguish {1to<NUM>} from {%k<NUM>}. 1833 if(getLexer().is(AsmToken::Integer)) { 1834 // Parse memory broadcasting ({1to<NUM>}). 1835 if (getLexer().getTok().getIntVal() != 1) 1836 return !ErrorAndEatStatement(getLexer().getLoc(), 1837 "Expected 1to<NUM> at this point"); 1838 Parser.Lex(); // Eat "1" of 1to8 1839 if (!getLexer().is(AsmToken::Identifier) || 1840 !getLexer().getTok().getIdentifier().startswith("to")) 1841 return !ErrorAndEatStatement(getLexer().getLoc(), 1842 "Expected 1to<NUM> at this point"); 1843 // Recognize only reasonable suffixes. 1844 const char *BroadcastPrimitive = 1845 StringSwitch<const char*>(getLexer().getTok().getIdentifier()) 1846 .Case("to2", "{1to2}") 1847 .Case("to4", "{1to4}") 1848 .Case("to8", "{1to8}") 1849 .Case("to16", "{1to16}") 1850 .Default(nullptr); 1851 if (!BroadcastPrimitive) 1852 return !ErrorAndEatStatement(getLexer().getLoc(), 1853 "Invalid memory broadcast primitive."); 1854 Parser.Lex(); // Eat "toN" of 1toN 1855 if (!getLexer().is(AsmToken::RCurly)) 1856 return !ErrorAndEatStatement(getLexer().getLoc(), 1857 "Expected } at this point"); 1858 Parser.Lex(); // Eat "}" 1859 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 1860 consumedToken)); 1861 // No AVX512 specific primitives can pass 1862 // after memory broadcasting, so return. 1863 return true; 1864 } else { 1865 // Parse mask register {%k1} 1866 Operands.push_back(X86Operand::CreateToken("{", consumedToken)); 1867 if (std::unique_ptr<X86Operand> Op = ParseOperand()) { 1868 Operands.push_back(std::move(Op)); 1869 if (!getLexer().is(AsmToken::RCurly)) 1870 return !ErrorAndEatStatement(getLexer().getLoc(), 1871 "Expected } at this point"); 1872 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 1873 1874 // Parse "zeroing non-masked" semantic {z} 1875 if (getLexer().is(AsmToken::LCurly)) { 1876 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken())); 1877 if (!getLexer().is(AsmToken::Identifier) || 1878 getLexer().getTok().getIdentifier() != "z") 1879 return !ErrorAndEatStatement(getLexer().getLoc(), 1880 "Expected z at this point"); 1881 Parser.Lex(); // Eat the z 1882 if (!getLexer().is(AsmToken::RCurly)) 1883 return !ErrorAndEatStatement(getLexer().getLoc(), 1884 "Expected } at this point"); 1885 Parser.Lex(); // Eat the } 1886 } 1887 } 1888 } 1889 } 1890 } 1891 return true; 1892} 1893 1894/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 1895/// has already been parsed if present. 1896std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, 1897 SMLoc MemStart) { 1898 1899 MCAsmParser &Parser = getParser(); 1900 // We have to disambiguate a parenthesized expression "(4+5)" from the start 1901 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 1902 // only way to do this without lookahead is to eat the '(' and see what is 1903 // after it. 1904 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext()); 1905 if (getLexer().isNot(AsmToken::LParen)) { 1906 SMLoc ExprEnd; 1907 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr; 1908 1909 // After parsing the base expression we could either have a parenthesized 1910 // memory address or not. If not, return now. If so, eat the (. 1911 if (getLexer().isNot(AsmToken::LParen)) { 1912 // Unless we have a segment register, treat this as an immediate. 1913 if (SegReg == 0) 1914 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd); 1915 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, 1916 MemStart, ExprEnd); 1917 } 1918 1919 // Eat the '('. 1920 Parser.Lex(); 1921 } else { 1922 // Okay, we have a '('. We don't know if this is an expression or not, but 1923 // so we have to eat the ( to see beyond it. 1924 SMLoc LParenLoc = Parser.getTok().getLoc(); 1925 Parser.Lex(); // Eat the '('. 1926 1927 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 1928 // Nothing to do here, fall into the code below with the '(' part of the 1929 // memory operand consumed. 1930 } else { 1931 SMLoc ExprEnd; 1932 1933 // It must be an parenthesized expression, parse it now. 1934 if (getParser().parseParenExpression(Disp, ExprEnd)) 1935 return nullptr; 1936 1937 // After parsing the base expression we could either have a parenthesized 1938 // memory address or not. If not, return now. If so, eat the (. 1939 if (getLexer().isNot(AsmToken::LParen)) { 1940 // Unless we have a segment register, treat this as an immediate. 1941 if (SegReg == 0) 1942 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc, 1943 ExprEnd); 1944 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, 1945 MemStart, ExprEnd); 1946 } 1947 1948 // Eat the '('. 1949 Parser.Lex(); 1950 } 1951 } 1952 1953 // If we reached here, then we just ate the ( of the memory operand. Process 1954 // the rest of the memory operand. 1955 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 1956 SMLoc IndexLoc, BaseLoc; 1957 1958 if (getLexer().is(AsmToken::Percent)) { 1959 SMLoc StartLoc, EndLoc; 1960 BaseLoc = Parser.getTok().getLoc(); 1961 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr; 1962 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 1963 Error(StartLoc, "eiz and riz can only be used as index registers", 1964 SMRange(StartLoc, EndLoc)); 1965 return nullptr; 1966 } 1967 } 1968 1969 if (getLexer().is(AsmToken::Comma)) { 1970 Parser.Lex(); // Eat the comma. 1971 IndexLoc = Parser.getTok().getLoc(); 1972 1973 // Following the comma we should have either an index register, or a scale 1974 // value. We don't support the later form, but we want to parse it 1975 // correctly. 1976 // 1977 // Not that even though it would be completely consistent to support syntax 1978 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 1979 if (getLexer().is(AsmToken::Percent)) { 1980 SMLoc L; 1981 if (ParseRegister(IndexReg, L, L)) return nullptr; 1982 1983 if (getLexer().isNot(AsmToken::RParen)) { 1984 // Parse the scale amount: 1985 // ::= ',' [scale-expression] 1986 if (getLexer().isNot(AsmToken::Comma)) { 1987 Error(Parser.getTok().getLoc(), 1988 "expected comma in scale expression"); 1989 return nullptr; 1990 } 1991 Parser.Lex(); // Eat the comma. 1992 1993 if (getLexer().isNot(AsmToken::RParen)) { 1994 SMLoc Loc = Parser.getTok().getLoc(); 1995 1996 int64_t ScaleVal; 1997 if (getParser().parseAbsoluteExpression(ScaleVal)){ 1998 Error(Loc, "expected scale expression"); 1999 return nullptr; 2000 } 2001 2002 // Validate the scale amount. 2003 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 2004 ScaleVal != 1) { 2005 Error(Loc, "scale factor in 16-bit address must be 1"); 2006 return nullptr; 2007 } 2008 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && 2009 ScaleVal != 8) { 2010 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 2011 return nullptr; 2012 } 2013 Scale = (unsigned)ScaleVal; 2014 } 2015 } 2016 } else if (getLexer().isNot(AsmToken::RParen)) { 2017 // A scale amount without an index is ignored. 2018 // index. 2019 SMLoc Loc = Parser.getTok().getLoc(); 2020 2021 int64_t Value; 2022 if (getParser().parseAbsoluteExpression(Value)) 2023 return nullptr; 2024 2025 if (Value != 1) 2026 Warning(Loc, "scale factor without index register is ignored"); 2027 Scale = 1; 2028 } 2029 } 2030 2031 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 2032 if (getLexer().isNot(AsmToken::RParen)) { 2033 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 2034 return nullptr; 2035 } 2036 SMLoc MemEnd = Parser.getTok().getEndLoc(); 2037 Parser.Lex(); // Eat the ')'. 2038 2039 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 2040 // and then only in non-64-bit modes. Except for DX, which is a special case 2041 // because an unofficial form of in/out instructions uses it. 2042 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 2043 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP && 2044 BaseReg != X86::SI && BaseReg != X86::DI)) && 2045 BaseReg != X86::DX) { 2046 Error(BaseLoc, "invalid 16-bit base register"); 2047 return nullptr; 2048 } 2049 if (BaseReg == 0 && 2050 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 2051 Error(IndexLoc, "16-bit memory operand may not include only index register"); 2052 return nullptr; 2053 } 2054 2055 StringRef ErrMsg; 2056 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { 2057 Error(BaseLoc, ErrMsg); 2058 return nullptr; 2059 } 2060 2061 if (SegReg || BaseReg || IndexReg) 2062 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 2063 IndexReg, Scale, MemStart, MemEnd); 2064 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd); 2065} 2066 2067bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 2068 SMLoc NameLoc, OperandVector &Operands) { 2069 MCAsmParser &Parser = getParser(); 2070 InstInfo = &Info; 2071 StringRef PatchedName = Name; 2072 2073 // FIXME: Hack to recognize setneb as setne. 2074 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 2075 PatchedName != "setb" && PatchedName != "setnb") 2076 PatchedName = PatchedName.substr(0, Name.size()-1); 2077 2078 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 2079 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 2080 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 2081 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 2082 bool IsVCMP = PatchedName[0] == 'v'; 2083 unsigned CCIdx = IsVCMP ? 4 : 3; 2084 unsigned ComparisonCode = StringSwitch<unsigned>( 2085 PatchedName.slice(CCIdx, PatchedName.size() - 2)) 2086 .Case("eq", 0x00) 2087 .Case("lt", 0x01) 2088 .Case("le", 0x02) 2089 .Case("unord", 0x03) 2090 .Case("neq", 0x04) 2091 .Case("nlt", 0x05) 2092 .Case("nle", 0x06) 2093 .Case("ord", 0x07) 2094 /* AVX only from here */ 2095 .Case("eq_uq", 0x08) 2096 .Case("nge", 0x09) 2097 .Case("ngt", 0x0A) 2098 .Case("false", 0x0B) 2099 .Case("neq_oq", 0x0C) 2100 .Case("ge", 0x0D) 2101 .Case("gt", 0x0E) 2102 .Case("true", 0x0F) 2103 .Case("eq_os", 0x10) 2104 .Case("lt_oq", 0x11) 2105 .Case("le_oq", 0x12) 2106 .Case("unord_s", 0x13) 2107 .Case("neq_us", 0x14) 2108 .Case("nlt_uq", 0x15) 2109 .Case("nle_uq", 0x16) 2110 .Case("ord_s", 0x17) 2111 .Case("eq_us", 0x18) 2112 .Case("nge_uq", 0x19) 2113 .Case("ngt_uq", 0x1A) 2114 .Case("false_os", 0x1B) 2115 .Case("neq_os", 0x1C) 2116 .Case("ge_oq", 0x1D) 2117 .Case("gt_oq", 0x1E) 2118 .Case("true_us", 0x1F) 2119 .Default(~0U); 2120 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) { 2121 2122 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx), 2123 NameLoc)); 2124 2125 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, 2126 getParser().getContext()); 2127 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2128 2129 PatchedName = PatchedName.substr(PatchedName.size() - 2); 2130 } 2131 } 2132 2133 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}. 2134 if (PatchedName.startswith("vpcmp") && 2135 (PatchedName.endswith("b") || PatchedName.endswith("w") || 2136 PatchedName.endswith("d") || PatchedName.endswith("q"))) { 2137 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1; 2138 unsigned ComparisonCode = StringSwitch<unsigned>( 2139 PatchedName.slice(5, PatchedName.size() - CCIdx)) 2140 .Case("eq", 0x0) // Only allowed on unsigned. Checked below. 2141 .Case("lt", 0x1) 2142 .Case("le", 0x2) 2143 //.Case("false", 0x3) // Not a documented alias. 2144 .Case("neq", 0x4) 2145 .Case("nlt", 0x5) 2146 .Case("nle", 0x6) 2147 //.Case("true", 0x7) // Not a documented alias. 2148 .Default(~0U); 2149 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) { 2150 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc)); 2151 2152 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, 2153 getParser().getContext()); 2154 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2155 2156 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx); 2157 } 2158 } 2159 2160 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}. 2161 if (PatchedName.startswith("vpcom") && 2162 (PatchedName.endswith("b") || PatchedName.endswith("w") || 2163 PatchedName.endswith("d") || PatchedName.endswith("q"))) { 2164 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1; 2165 unsigned ComparisonCode = StringSwitch<unsigned>( 2166 PatchedName.slice(5, PatchedName.size() - CCIdx)) 2167 .Case("lt", 0x0) 2168 .Case("le", 0x1) 2169 .Case("gt", 0x2) 2170 .Case("ge", 0x3) 2171 .Case("eq", 0x4) 2172 .Case("neq", 0x5) 2173 .Case("false", 0x6) 2174 .Case("true", 0x7) 2175 .Default(~0U); 2176 if (ComparisonCode != ~0U) { 2177 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc)); 2178 2179 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, 2180 getParser().getContext()); 2181 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2182 2183 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx); 2184 } 2185 } 2186 2187 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 2188 2189 // Determine whether this is an instruction prefix. 2190 bool isPrefix = 2191 Name == "lock" || Name == "rep" || 2192 Name == "repe" || Name == "repz" || 2193 Name == "repne" || Name == "repnz" || 2194 Name == "rex64" || Name == "data16"; 2195 2196 // This does the actual operand parsing. Don't parse any more if we have a 2197 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 2198 // just want to parse the "lock" as the first instruction and the "incl" as 2199 // the next one. 2200 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 2201 2202 // Parse '*' modifier. 2203 if (getLexer().is(AsmToken::Star)) 2204 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 2205 2206 // Read the operands. 2207 while(1) { 2208 if (std::unique_ptr<X86Operand> Op = ParseOperand()) { 2209 Operands.push_back(std::move(Op)); 2210 if (!HandleAVX512Operand(Operands, *Operands.back())) 2211 return true; 2212 } else { 2213 Parser.eatToEndOfStatement(); 2214 return true; 2215 } 2216 // check for comma and eat it 2217 if (getLexer().is(AsmToken::Comma)) 2218 Parser.Lex(); 2219 else 2220 break; 2221 } 2222 2223 if (getLexer().isNot(AsmToken::EndOfStatement)) 2224 return ErrorAndEatStatement(getLexer().getLoc(), 2225 "unexpected token in argument list"); 2226 } 2227 2228 // Consume the EndOfStatement or the prefix separator Slash 2229 if (getLexer().is(AsmToken::EndOfStatement) || 2230 (isPrefix && getLexer().is(AsmToken::Slash))) 2231 Parser.Lex(); 2232 2233 // This is for gas compatibility and cannot be done in td. 2234 // Adding "p" for some floating point with no argument. 2235 // For example: fsub --> fsubp 2236 bool IsFp = 2237 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr"; 2238 if (IsFp && Operands.size() == 1) { 2239 const char *Repl = StringSwitch<const char *>(Name) 2240 .Case("fsub", "fsubp") 2241 .Case("fdiv", "fdivp") 2242 .Case("fsubr", "fsubrp") 2243 .Case("fdivr", "fdivrp"); 2244 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl); 2245 } 2246 2247 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> 2248 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 2249 // documented form in various unofficial manuals, so a lot of code uses it. 2250 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && 2251 Operands.size() == 3) { 2252 X86Operand &Op = (X86Operand &)*Operands.back(); 2253 if (Op.isMem() && Op.Mem.SegReg == 0 && 2254 isa<MCConstantExpr>(Op.Mem.Disp) && 2255 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 2256 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 2257 SMLoc Loc = Op.getEndLoc(); 2258 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 2259 } 2260 } 2261 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". 2262 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && 2263 Operands.size() == 3) { 2264 X86Operand &Op = (X86Operand &)*Operands[1]; 2265 if (Op.isMem() && Op.Mem.SegReg == 0 && 2266 isa<MCConstantExpr>(Op.Mem.Disp) && 2267 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 2268 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 2269 SMLoc Loc = Op.getEndLoc(); 2270 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 2271 } 2272 } 2273 2274 // Append default arguments to "ins[bwld]" 2275 if (Name.startswith("ins") && Operands.size() == 1 && 2276 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd")) { 2277 AddDefaultSrcDestOperands(Operands, 2278 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), 2279 DefaultMemDIOperand(NameLoc)); 2280 } 2281 2282 // Append default arguments to "outs[bwld]" 2283 if (Name.startswith("outs") && Operands.size() == 1 && 2284 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 2285 Name == "outsd" )) { 2286 AddDefaultSrcDestOperands(Operands, 2287 DefaultMemSIOperand(NameLoc), 2288 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2289 } 2290 2291 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 2292 // values of $SIREG according to the mode. It would be nice if this 2293 // could be achieved with InstAlias in the tables. 2294 if (Name.startswith("lods") && Operands.size() == 1 && 2295 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 2296 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) 2297 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2298 2299 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 2300 // values of $DIREG according to the mode. It would be nice if this 2301 // could be achieved with InstAlias in the tables. 2302 if (Name.startswith("stos") && Operands.size() == 1 && 2303 (Name == "stos" || Name == "stosb" || Name == "stosw" || 2304 Name == "stosl" || Name == "stosd" || Name == "stosq")) 2305 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2306 2307 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 2308 // values of $DIREG according to the mode. It would be nice if this 2309 // could be achieved with InstAlias in the tables. 2310 if (Name.startswith("scas") && Operands.size() == 1 && 2311 (Name == "scas" || Name == "scasb" || Name == "scasw" || 2312 Name == "scasl" || Name == "scasd" || Name == "scasq")) 2313 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2314 2315 // Add default SI and DI operands to "cmps[bwlq]". 2316 if (Name.startswith("cmps") && 2317 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 2318 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 2319 if (Operands.size() == 1) { 2320 AddDefaultSrcDestOperands(Operands, 2321 DefaultMemDIOperand(NameLoc), 2322 DefaultMemSIOperand(NameLoc)); 2323 } else if (Operands.size() == 3) { 2324 X86Operand &Op = (X86Operand &)*Operands[1]; 2325 X86Operand &Op2 = (X86Operand &)*Operands[2]; 2326 if (!doSrcDstMatch(Op, Op2)) 2327 return Error(Op.getStartLoc(), 2328 "mismatching source and destination index registers"); 2329 } 2330 } 2331 2332 // Add default SI and DI operands to "movs[bwlq]". 2333 if ((Name.startswith("movs") && 2334 (Name == "movs" || Name == "movsb" || Name == "movsw" || 2335 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 2336 (Name.startswith("smov") && 2337 (Name == "smov" || Name == "smovb" || Name == "smovw" || 2338 Name == "smovl" || Name == "smovd" || Name == "smovq"))) { 2339 if (Operands.size() == 1) { 2340 if (Name == "movsd") 2341 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 2342 AddDefaultSrcDestOperands(Operands, 2343 DefaultMemSIOperand(NameLoc), 2344 DefaultMemDIOperand(NameLoc)); 2345 } else if (Operands.size() == 3) { 2346 X86Operand &Op = (X86Operand &)*Operands[1]; 2347 X86Operand &Op2 = (X86Operand &)*Operands[2]; 2348 if (!doSrcDstMatch(Op, Op2)) 2349 return Error(Op.getStartLoc(), 2350 "mismatching source and destination index registers"); 2351 } 2352 } 2353 2354 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 2355 // "shift <op>". 2356 if ((Name.startswith("shr") || Name.startswith("sar") || 2357 Name.startswith("shl") || Name.startswith("sal") || 2358 Name.startswith("rcl") || Name.startswith("rcr") || 2359 Name.startswith("rol") || Name.startswith("ror")) && 2360 Operands.size() == 3) { 2361 if (isParsingIntelSyntax()) { 2362 // Intel syntax 2363 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]); 2364 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2365 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 2366 Operands.pop_back(); 2367 } else { 2368 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2369 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2370 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 2371 Operands.erase(Operands.begin() + 1); 2372 } 2373 } 2374 2375 // Transforms "int $3" into "int3" as a size optimization. We can't write an 2376 // instalias with an immediate operand yet. 2377 if (Name == "int" && Operands.size() == 2) { 2378 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2379 if (Op1.isImm()) 2380 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm())) 2381 if (CE->getValue() == 3) { 2382 Operands.erase(Operands.begin() + 1); 2383 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3"); 2384 } 2385 } 2386 2387 return false; 2388} 2389 2390bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { 2391 switch (Inst.getOpcode()) { 2392 default: return false; 2393 case X86::VMOVZPQILo2PQIrr: 2394 case X86::VMOVAPDrr: 2395 case X86::VMOVAPDYrr: 2396 case X86::VMOVAPSrr: 2397 case X86::VMOVAPSYrr: 2398 case X86::VMOVDQArr: 2399 case X86::VMOVDQAYrr: 2400 case X86::VMOVDQUrr: 2401 case X86::VMOVDQUYrr: 2402 case X86::VMOVUPDrr: 2403 case X86::VMOVUPDYrr: 2404 case X86::VMOVUPSrr: 2405 case X86::VMOVUPSYrr: { 2406 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) || 2407 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg())) 2408 return false; 2409 2410 unsigned NewOpc; 2411 switch (Inst.getOpcode()) { 2412 default: llvm_unreachable("Invalid opcode"); 2413 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break; 2414 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; 2415 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; 2416 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; 2417 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; 2418 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; 2419 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; 2420 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; 2421 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; 2422 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; 2423 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; 2424 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; 2425 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; 2426 } 2427 Inst.setOpcode(NewOpc); 2428 return true; 2429 } 2430 case X86::VMOVSDrr: 2431 case X86::VMOVSSrr: { 2432 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) || 2433 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg())) 2434 return false; 2435 unsigned NewOpc; 2436 switch (Inst.getOpcode()) { 2437 default: llvm_unreachable("Invalid opcode"); 2438 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; 2439 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; 2440 } 2441 Inst.setOpcode(NewOpc); 2442 return true; 2443 } 2444 } 2445} 2446 2447static const char *getSubtargetFeatureName(uint64_t Val); 2448 2449void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands, 2450 MCStreamer &Out) { 2451 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(), 2452 MII, Out); 2453} 2454 2455bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2456 OperandVector &Operands, 2457 MCStreamer &Out, uint64_t &ErrorInfo, 2458 bool MatchingInlineAsm) { 2459 if (isParsingIntelSyntax()) 2460 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 2461 MatchingInlineAsm); 2462 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 2463 MatchingInlineAsm); 2464} 2465 2466void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, 2467 OperandVector &Operands, MCStreamer &Out, 2468 bool MatchingInlineAsm) { 2469 // FIXME: This should be replaced with a real .td file alias mechanism. 2470 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 2471 // call. 2472 const char *Repl = StringSwitch<const char *>(Op.getToken()) 2473 .Case("finit", "fninit") 2474 .Case("fsave", "fnsave") 2475 .Case("fstcw", "fnstcw") 2476 .Case("fstcww", "fnstcw") 2477 .Case("fstenv", "fnstenv") 2478 .Case("fstsw", "fnstsw") 2479 .Case("fstsww", "fnstsw") 2480 .Case("fclex", "fnclex") 2481 .Default(nullptr); 2482 if (Repl) { 2483 MCInst Inst; 2484 Inst.setOpcode(X86::WAIT); 2485 Inst.setLoc(IDLoc); 2486 if (!MatchingInlineAsm) 2487 EmitInstruction(Inst, Operands, Out); 2488 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 2489 } 2490} 2491 2492bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, 2493 bool MatchingInlineAsm) { 2494 assert(ErrorInfo && "Unknown missing feature!"); 2495 ArrayRef<SMRange> EmptyRanges = None; 2496 SmallString<126> Msg; 2497 raw_svector_ostream OS(Msg); 2498 OS << "instruction requires:"; 2499 uint64_t Mask = 1; 2500 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { 2501 if (ErrorInfo & Mask) 2502 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask); 2503 Mask <<= 1; 2504 } 2505 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); 2506} 2507 2508bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 2509 OperandVector &Operands, 2510 MCStreamer &Out, 2511 uint64_t &ErrorInfo, 2512 bool MatchingInlineAsm) { 2513 assert(!Operands.empty() && "Unexpect empty operand list!"); 2514 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 2515 assert(Op.isToken() && "Leading operand should always be a mnemonic!"); 2516 ArrayRef<SMRange> EmptyRanges = None; 2517 2518 // First, handle aliases that expand to multiple instructions. 2519 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); 2520 2521 bool WasOriginallyInvalidOperand = false; 2522 MCInst Inst; 2523 2524 // First, try a direct match. 2525 switch (MatchInstructionImpl(Operands, Inst, 2526 ErrorInfo, MatchingInlineAsm, 2527 isParsingIntelSyntax())) { 2528 default: llvm_unreachable("Unexpected match result!"); 2529 case Match_Success: 2530 // Some instructions need post-processing to, for example, tweak which 2531 // encoding is selected. Loop on it while changes happen so the 2532 // individual transformations can chain off each other. 2533 if (!MatchingInlineAsm) 2534 while (processInstruction(Inst, Operands)) 2535 ; 2536 2537 Inst.setLoc(IDLoc); 2538 if (!MatchingInlineAsm) 2539 EmitInstruction(Inst, Operands, Out); 2540 Opcode = Inst.getOpcode(); 2541 return false; 2542 case Match_MissingFeature: 2543 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm); 2544 case Match_InvalidOperand: 2545 WasOriginallyInvalidOperand = true; 2546 break; 2547 case Match_MnemonicFail: 2548 break; 2549 } 2550 2551 // FIXME: Ideally, we would only attempt suffix matches for things which are 2552 // valid prefixes, and we could just infer the right unambiguous 2553 // type. However, that requires substantially more matcher support than the 2554 // following hack. 2555 2556 // Change the operand to point to a temporary token. 2557 StringRef Base = Op.getToken(); 2558 SmallString<16> Tmp; 2559 Tmp += Base; 2560 Tmp += ' '; 2561 Op.setTokenValue(Tmp); 2562 2563 // If this instruction starts with an 'f', then it is a floating point stack 2564 // instruction. These come in up to three forms for 32-bit, 64-bit, and 2565 // 80-bit floating point, which use the suffixes s,l,t respectively. 2566 // 2567 // Otherwise, we assume that this may be an integer instruction, which comes 2568 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 2569 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 2570 2571 // Check for the various suffix matches. 2572 uint64_t ErrorInfoIgnore; 2573 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings. 2574 unsigned Match[4]; 2575 2576 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) { 2577 Tmp.back() = Suffixes[I]; 2578 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2579 MatchingInlineAsm, isParsingIntelSyntax()); 2580 // If this returned as a missing feature failure, remember that. 2581 if (Match[I] == Match_MissingFeature) 2582 ErrorInfoMissingFeature = ErrorInfoIgnore; 2583 } 2584 2585 // Restore the old token. 2586 Op.setTokenValue(Base); 2587 2588 // If exactly one matched, then we treat that as a successful match (and the 2589 // instruction will already have been filled in correctly, since the failing 2590 // matches won't have modified it). 2591 unsigned NumSuccessfulMatches = 2592 std::count(std::begin(Match), std::end(Match), Match_Success); 2593 if (NumSuccessfulMatches == 1) { 2594 Inst.setLoc(IDLoc); 2595 if (!MatchingInlineAsm) 2596 EmitInstruction(Inst, Operands, Out); 2597 Opcode = Inst.getOpcode(); 2598 return false; 2599 } 2600 2601 // Otherwise, the match failed, try to produce a decent error message. 2602 2603 // If we had multiple suffix matches, then identify this as an ambiguous 2604 // match. 2605 if (NumSuccessfulMatches > 1) { 2606 char MatchChars[4]; 2607 unsigned NumMatches = 0; 2608 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) 2609 if (Match[I] == Match_Success) 2610 MatchChars[NumMatches++] = Suffixes[I]; 2611 2612 SmallString<126> Msg; 2613 raw_svector_ostream OS(Msg); 2614 OS << "ambiguous instructions require an explicit suffix (could be "; 2615 for (unsigned i = 0; i != NumMatches; ++i) { 2616 if (i != 0) 2617 OS << ", "; 2618 if (i + 1 == NumMatches) 2619 OS << "or "; 2620 OS << "'" << Base << MatchChars[i] << "'"; 2621 } 2622 OS << ")"; 2623 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); 2624 return true; 2625 } 2626 2627 // Okay, we know that none of the variants matched successfully. 2628 2629 // If all of the instructions reported an invalid mnemonic, then the original 2630 // mnemonic was invalid. 2631 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) { 2632 if (!WasOriginallyInvalidOperand) { 2633 ArrayRef<SMRange> Ranges = 2634 MatchingInlineAsm ? EmptyRanges : Op.getLocRange(); 2635 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 2636 Ranges, MatchingInlineAsm); 2637 } 2638 2639 // Recover location info for the operand if we know which was the problem. 2640 if (ErrorInfo != ~0ULL) { 2641 if (ErrorInfo >= Operands.size()) 2642 return Error(IDLoc, "too few operands for instruction", 2643 EmptyRanges, MatchingInlineAsm); 2644 2645 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; 2646 if (Operand.getStartLoc().isValid()) { 2647 SMRange OperandRange = Operand.getLocRange(); 2648 return Error(Operand.getStartLoc(), "invalid operand for instruction", 2649 OperandRange, MatchingInlineAsm); 2650 } 2651 } 2652 2653 return Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2654 MatchingInlineAsm); 2655 } 2656 2657 // If one instruction matched with a missing feature, report this as a 2658 // missing feature. 2659 if (std::count(std::begin(Match), std::end(Match), 2660 Match_MissingFeature) == 1) { 2661 ErrorInfo = ErrorInfoMissingFeature; 2662 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature, 2663 MatchingInlineAsm); 2664 } 2665 2666 // If one instruction matched with an invalid operand, report this as an 2667 // operand failure. 2668 if (std::count(std::begin(Match), std::end(Match), 2669 Match_InvalidOperand) == 1) { 2670 return Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2671 MatchingInlineAsm); 2672 } 2673 2674 // If all of these were an outright failure, report it in a useless way. 2675 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 2676 EmptyRanges, MatchingInlineAsm); 2677 return true; 2678} 2679 2680bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 2681 OperandVector &Operands, 2682 MCStreamer &Out, 2683 uint64_t &ErrorInfo, 2684 bool MatchingInlineAsm) { 2685 assert(!Operands.empty() && "Unexpect empty operand list!"); 2686 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 2687 assert(Op.isToken() && "Leading operand should always be a mnemonic!"); 2688 StringRef Mnemonic = Op.getToken(); 2689 ArrayRef<SMRange> EmptyRanges = None; 2690 2691 // First, handle aliases that expand to multiple instructions. 2692 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); 2693 2694 MCInst Inst; 2695 2696 // Find one unsized memory operand, if present. 2697 X86Operand *UnsizedMemOp = nullptr; 2698 for (const auto &Op : Operands) { 2699 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 2700 if (X86Op->isMemUnsized()) 2701 UnsizedMemOp = X86Op; 2702 } 2703 2704 // Allow some instructions to have implicitly pointer-sized operands. This is 2705 // compatible with gas. 2706 if (UnsizedMemOp) { 2707 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; 2708 for (const char *Instr : PtrSizedInstrs) { 2709 if (Mnemonic == Instr) { 2710 UnsizedMemOp->Mem.Size = getPointerWidth(); 2711 break; 2712 } 2713 } 2714 } 2715 2716 // If an unsized memory operand is present, try to match with each memory 2717 // operand size. In Intel assembly, the size is not part of the instruction 2718 // mnemonic. 2719 SmallVector<unsigned, 8> Match; 2720 uint64_t ErrorInfoMissingFeature = 0; 2721 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { 2722 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; 2723 for (unsigned Size : MopSizes) { 2724 UnsizedMemOp->Mem.Size = Size; 2725 uint64_t ErrorInfoIgnore; 2726 unsigned LastOpcode = Inst.getOpcode(); 2727 unsigned M = 2728 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2729 MatchingInlineAsm, isParsingIntelSyntax()); 2730 if (Match.empty() || LastOpcode != Inst.getOpcode()) 2731 Match.push_back(M); 2732 2733 // If this returned as a missing feature failure, remember that. 2734 if (Match.back() == Match_MissingFeature) 2735 ErrorInfoMissingFeature = ErrorInfoIgnore; 2736 } 2737 2738 // Restore the size of the unsized memory operand if we modified it. 2739 if (UnsizedMemOp) 2740 UnsizedMemOp->Mem.Size = 0; 2741 } 2742 2743 // If we haven't matched anything yet, this is not a basic integer or FPU 2744 // operation. There shouldn't be any ambiguity in our mnemonic table, so try 2745 // matching with the unsized operand. 2746 if (Match.empty()) { 2747 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo, 2748 MatchingInlineAsm, 2749 isParsingIntelSyntax())); 2750 // If this returned as a missing feature failure, remember that. 2751 if (Match.back() == Match_MissingFeature) 2752 ErrorInfoMissingFeature = ErrorInfo; 2753 } 2754 2755 // Restore the size of the unsized memory operand if we modified it. 2756 if (UnsizedMemOp) 2757 UnsizedMemOp->Mem.Size = 0; 2758 2759 // If it's a bad mnemonic, all results will be the same. 2760 if (Match.back() == Match_MnemonicFail) { 2761 ArrayRef<SMRange> Ranges = 2762 MatchingInlineAsm ? EmptyRanges : Op.getLocRange(); 2763 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", 2764 Ranges, MatchingInlineAsm); 2765 } 2766 2767 // If exactly one matched, then we treat that as a successful match (and the 2768 // instruction will already have been filled in correctly, since the failing 2769 // matches won't have modified it). 2770 unsigned NumSuccessfulMatches = 2771 std::count(std::begin(Match), std::end(Match), Match_Success); 2772 if (NumSuccessfulMatches == 1) { 2773 // Some instructions need post-processing to, for example, tweak which 2774 // encoding is selected. Loop on it while changes happen so the individual 2775 // transformations can chain off each other. 2776 if (!MatchingInlineAsm) 2777 while (processInstruction(Inst, Operands)) 2778 ; 2779 Inst.setLoc(IDLoc); 2780 if (!MatchingInlineAsm) 2781 EmitInstruction(Inst, Operands, Out); 2782 Opcode = Inst.getOpcode(); 2783 return false; 2784 } else if (NumSuccessfulMatches > 1) { 2785 assert(UnsizedMemOp && 2786 "multiple matches only possible with unsized memory operands"); 2787 ArrayRef<SMRange> Ranges = 2788 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange(); 2789 return Error(UnsizedMemOp->getStartLoc(), 2790 "ambiguous operand size for instruction '" + Mnemonic + "\'", 2791 Ranges, MatchingInlineAsm); 2792 } 2793 2794 // If one instruction matched with a missing feature, report this as a 2795 // missing feature. 2796 if (std::count(std::begin(Match), std::end(Match), 2797 Match_MissingFeature) == 1) { 2798 ErrorInfo = ErrorInfoMissingFeature; 2799 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature, 2800 MatchingInlineAsm); 2801 } 2802 2803 // If one instruction matched with an invalid operand, report this as an 2804 // operand failure. 2805 if (std::count(std::begin(Match), std::end(Match), 2806 Match_InvalidOperand) == 1) { 2807 return Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2808 MatchingInlineAsm); 2809 } 2810 2811 // If all of these were an outright failure, report it in a useless way. 2812 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges, 2813 MatchingInlineAsm); 2814} 2815 2816bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { 2817 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); 2818} 2819 2820bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 2821 MCAsmParser &Parser = getParser(); 2822 StringRef IDVal = DirectiveID.getIdentifier(); 2823 if (IDVal == ".word") 2824 return ParseDirectiveWord(2, DirectiveID.getLoc()); 2825 else if (IDVal.startswith(".code")) 2826 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 2827 else if (IDVal.startswith(".att_syntax")) { 2828 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2829 if (Parser.getTok().getString() == "prefix") 2830 Parser.Lex(); 2831 else if (Parser.getTok().getString() == "noprefix") 2832 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not " 2833 "supported: registers must have a " 2834 "'%' prefix in .att_syntax"); 2835 } 2836 getParser().setAssemblerDialect(0); 2837 return false; 2838 } else if (IDVal.startswith(".intel_syntax")) { 2839 getParser().setAssemblerDialect(1); 2840 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2841 if (Parser.getTok().getString() == "noprefix") 2842 Parser.Lex(); 2843 else if (Parser.getTok().getString() == "prefix") 2844 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not " 2845 "supported: registers must not have " 2846 "a '%' prefix in .intel_syntax"); 2847 } 2848 return false; 2849 } else if (IDVal == ".even") 2850 return parseDirectiveEven(DirectiveID.getLoc()); 2851 return true; 2852} 2853 2854/// parseDirectiveEven 2855/// ::= .even 2856bool X86AsmParser::parseDirectiveEven(SMLoc L) { 2857 const MCSection *Section = getStreamer().getCurrentSection().first; 2858 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2859 TokError("unexpected token in directive"); 2860 return false; 2861 } 2862 if (!Section) { 2863 getStreamer().InitSections(false); 2864 Section = getStreamer().getCurrentSection().first; 2865 } 2866 if (Section->UseCodeAlign()) 2867 getStreamer().EmitCodeAlignment(2, 0); 2868 else 2869 getStreamer().EmitValueToAlignment(2, 0, 1, 0); 2870 return false; 2871} 2872/// ParseDirectiveWord 2873/// ::= .word [ expression (, expression)* ] 2874bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 2875 MCAsmParser &Parser = getParser(); 2876 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2877 for (;;) { 2878 const MCExpr *Value; 2879 SMLoc ExprLoc = getLexer().getLoc(); 2880 if (getParser().parseExpression(Value)) 2881 return false; 2882 2883 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) { 2884 assert(Size <= 8 && "Invalid size"); 2885 uint64_t IntValue = MCE->getValue(); 2886 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) 2887 return Error(ExprLoc, "literal value out of range for directive"); 2888 getStreamer().EmitIntValue(IntValue, Size); 2889 } else { 2890 getStreamer().EmitValue(Value, Size, ExprLoc); 2891 } 2892 2893 if (getLexer().is(AsmToken::EndOfStatement)) 2894 break; 2895 2896 // FIXME: Improve diagnostic. 2897 if (getLexer().isNot(AsmToken::Comma)) { 2898 Error(L, "unexpected token in directive"); 2899 return false; 2900 } 2901 Parser.Lex(); 2902 } 2903 } 2904 2905 Parser.Lex(); 2906 return false; 2907} 2908 2909/// ParseDirectiveCode 2910/// ::= .code16 | .code32 | .code64 2911bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 2912 MCAsmParser &Parser = getParser(); 2913 if (IDVal == ".code16") { 2914 Parser.Lex(); 2915 if (!is16BitMode()) { 2916 SwitchMode(X86::Mode16Bit); 2917 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 2918 } 2919 } else if (IDVal == ".code32") { 2920 Parser.Lex(); 2921 if (!is32BitMode()) { 2922 SwitchMode(X86::Mode32Bit); 2923 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 2924 } 2925 } else if (IDVal == ".code64") { 2926 Parser.Lex(); 2927 if (!is64BitMode()) { 2928 SwitchMode(X86::Mode64Bit); 2929 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); 2930 } 2931 } else { 2932 Error(L, "unknown directive " + IDVal); 2933 return false; 2934 } 2935 2936 return false; 2937} 2938 2939// Force static initialization. 2940extern "C" void LLVMInitializeX86AsmParser() { 2941 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target); 2942 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target); 2943} 2944 2945#define GET_REGISTER_MATCHER 2946#define GET_MATCHER_IMPLEMENTATION 2947#define GET_SUBTARGET_FEATURE_NAME 2948#include "X86GenAsmMatcher.inc" 2949