X86AsmParser.cpp revision c0c8df3cea0dde2069edd10313a958508f99ec85
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "llvm/Target/TargetAsmParser.h" 11#include "X86.h" 12#include "X86Subtarget.h" 13#include "llvm/Target/TargetRegistry.h" 14#include "llvm/Target/TargetAsmParser.h" 15#include "llvm/MC/MCStreamer.h" 16#include "llvm/MC/MCExpr.h" 17#include "llvm/MC/MCInst.h" 18#include "llvm/MC/MCParser/MCAsmLexer.h" 19#include "llvm/MC/MCParser/MCAsmParser.h" 20#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 21#include "llvm/ADT/SmallString.h" 22#include "llvm/ADT/SmallVector.h" 23#include "llvm/ADT/StringExtras.h" 24#include "llvm/ADT/StringSwitch.h" 25#include "llvm/ADT/Twine.h" 26#include "llvm/Support/SourceMgr.h" 27#include "llvm/Support/raw_ostream.h" 28using namespace llvm; 29 30namespace { 31struct X86Operand; 32 33class X86ATTAsmParser : public TargetAsmParser { 34 MCAsmParser &Parser; 35 TargetMachine &TM; 36 37protected: 38 unsigned Is64Bit : 1; 39 40private: 41 MCAsmParser &getParser() const { return Parser; } 42 43 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 44 45 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 46 47 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); 48 49 X86Operand *ParseOperand(); 50 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 51 52 bool ParseDirectiveWord(unsigned Size, SMLoc L); 53 54 bool MatchAndEmitInstruction(SMLoc IDLoc, 55 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 56 MCStreamer &Out); 57 58 /// @name Auto-generated Matcher Functions 59 /// { 60 61#define GET_ASSEMBLER_HEADER 62#include "X86GenAsmMatcher.inc" 63 64 /// } 65 66public: 67 X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) 68 : TargetAsmParser(T), Parser(_Parser), TM(TM) { 69 70 // Initialize the set of available features. 71 setAvailableFeatures(ComputeAvailableFeatures( 72 &TM.getSubtarget<X86Subtarget>())); 73 } 74 75 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, 76 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 77 78 virtual bool ParseDirective(AsmToken DirectiveID); 79}; 80 81class X86_32ATTAsmParser : public X86ATTAsmParser { 82public: 83 X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) 84 : X86ATTAsmParser(T, _Parser, TM) { 85 Is64Bit = false; 86 } 87}; 88 89class X86_64ATTAsmParser : public X86ATTAsmParser { 90public: 91 X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) 92 : X86ATTAsmParser(T, _Parser, TM) { 93 Is64Bit = true; 94 } 95}; 96 97} // end anonymous namespace 98 99/// @name Auto-generated Match Functions 100/// { 101 102static unsigned MatchRegisterName(StringRef Name); 103 104/// } 105 106namespace { 107 108/// X86Operand - Instances of this class represent a parsed X86 machine 109/// instruction. 110struct X86Operand : public MCParsedAsmOperand { 111 enum KindTy { 112 Token, 113 Register, 114 Immediate, 115 Memory 116 } Kind; 117 118 SMLoc StartLoc, EndLoc; 119 120 union { 121 struct { 122 const char *Data; 123 unsigned Length; 124 } Tok; 125 126 struct { 127 unsigned RegNo; 128 } Reg; 129 130 struct { 131 const MCExpr *Val; 132 } Imm; 133 134 struct { 135 unsigned SegReg; 136 const MCExpr *Disp; 137 unsigned BaseReg; 138 unsigned IndexReg; 139 unsigned Scale; 140 } Mem; 141 }; 142 143 X86Operand(KindTy K, SMLoc Start, SMLoc End) 144 : Kind(K), StartLoc(Start), EndLoc(End) {} 145 146 /// getStartLoc - Get the location of the first token of this operand. 147 SMLoc getStartLoc() const { return StartLoc; } 148 /// getEndLoc - Get the location of the last token of this operand. 149 SMLoc getEndLoc() const { return EndLoc; } 150 151 virtual void dump(raw_ostream &OS) const {} 152 153 StringRef getToken() const { 154 assert(Kind == Token && "Invalid access!"); 155 return StringRef(Tok.Data, Tok.Length); 156 } 157 void setTokenValue(StringRef Value) { 158 assert(Kind == Token && "Invalid access!"); 159 Tok.Data = Value.data(); 160 Tok.Length = Value.size(); 161 } 162 163 unsigned getReg() const { 164 assert(Kind == Register && "Invalid access!"); 165 return Reg.RegNo; 166 } 167 168 const MCExpr *getImm() const { 169 assert(Kind == Immediate && "Invalid access!"); 170 return Imm.Val; 171 } 172 173 const MCExpr *getMemDisp() const { 174 assert(Kind == Memory && "Invalid access!"); 175 return Mem.Disp; 176 } 177 unsigned getMemSegReg() const { 178 assert(Kind == Memory && "Invalid access!"); 179 return Mem.SegReg; 180 } 181 unsigned getMemBaseReg() const { 182 assert(Kind == Memory && "Invalid access!"); 183 return Mem.BaseReg; 184 } 185 unsigned getMemIndexReg() const { 186 assert(Kind == Memory && "Invalid access!"); 187 return Mem.IndexReg; 188 } 189 unsigned getMemScale() const { 190 assert(Kind == Memory && "Invalid access!"); 191 return Mem.Scale; 192 } 193 194 bool isToken() const {return Kind == Token; } 195 196 bool isImm() const { return Kind == Immediate; } 197 198 bool isImmSExti16i8() const { 199 if (!isImm()) 200 return false; 201 202 // If this isn't a constant expr, just assume it fits and let relaxation 203 // handle it. 204 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 205 if (!CE) 206 return true; 207 208 // Otherwise, check the value is in a range that makes sense for this 209 // extension. 210 uint64_t Value = CE->getValue(); 211 return (( Value <= 0x000000000000007FULL)|| 212 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| 213 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 214 } 215 bool isImmSExti32i8() const { 216 if (!isImm()) 217 return false; 218 219 // If this isn't a constant expr, just assume it fits and let relaxation 220 // handle it. 221 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 222 if (!CE) 223 return true; 224 225 // Otherwise, check the value is in a range that makes sense for this 226 // extension. 227 uint64_t Value = CE->getValue(); 228 return (( Value <= 0x000000000000007FULL)|| 229 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| 230 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 231 } 232 bool isImmSExti64i8() const { 233 if (!isImm()) 234 return false; 235 236 // If this isn't a constant expr, just assume it fits and let relaxation 237 // handle it. 238 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 239 if (!CE) 240 return true; 241 242 // Otherwise, check the value is in a range that makes sense for this 243 // extension. 244 uint64_t Value = CE->getValue(); 245 return (( Value <= 0x000000000000007FULL)|| 246 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 247 } 248 bool isImmSExti64i32() const { 249 if (!isImm()) 250 return false; 251 252 // If this isn't a constant expr, just assume it fits and let relaxation 253 // handle it. 254 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 255 if (!CE) 256 return true; 257 258 // Otherwise, check the value is in a range that makes sense for this 259 // extension. 260 uint64_t Value = CE->getValue(); 261 return (( Value <= 0x000000007FFFFFFFULL)|| 262 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 263 } 264 265 bool isMem() const { return Kind == Memory; } 266 267 bool isAbsMem() const { 268 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && 269 !getMemIndexReg() && getMemScale() == 1; 270 } 271 272 bool isReg() const { return Kind == Register; } 273 274 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 275 // Add as immediates when possible. 276 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 277 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 278 else 279 Inst.addOperand(MCOperand::CreateExpr(Expr)); 280 } 281 282 void addRegOperands(MCInst &Inst, unsigned N) const { 283 assert(N == 1 && "Invalid number of operands!"); 284 Inst.addOperand(MCOperand::CreateReg(getReg())); 285 } 286 287 void addImmOperands(MCInst &Inst, unsigned N) const { 288 assert(N == 1 && "Invalid number of operands!"); 289 addExpr(Inst, getImm()); 290 } 291 292 void addMemOperands(MCInst &Inst, unsigned N) const { 293 assert((N == 5) && "Invalid number of operands!"); 294 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 295 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 296 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 297 addExpr(Inst, getMemDisp()); 298 Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); 299 } 300 301 void addAbsMemOperands(MCInst &Inst, unsigned N) const { 302 assert((N == 1) && "Invalid number of operands!"); 303 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 304 } 305 306 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { 307 X86Operand *Res = new X86Operand(Token, Loc, Loc); 308 Res->Tok.Data = Str.data(); 309 Res->Tok.Length = Str.size(); 310 return Res; 311 } 312 313 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) { 314 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); 315 Res->Reg.RegNo = RegNo; 316 return Res; 317 } 318 319 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ 320 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); 321 Res->Imm.Val = Val; 322 return Res; 323 } 324 325 /// Create an absolute memory operand. 326 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, 327 SMLoc EndLoc) { 328 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 329 Res->Mem.SegReg = 0; 330 Res->Mem.Disp = Disp; 331 Res->Mem.BaseReg = 0; 332 Res->Mem.IndexReg = 0; 333 Res->Mem.Scale = 1; 334 return Res; 335 } 336 337 /// Create a generalized memory operand. 338 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, 339 unsigned BaseReg, unsigned IndexReg, 340 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) { 341 // We should never just have a displacement, that should be parsed as an 342 // absolute memory operand. 343 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); 344 345 // The scale should always be one of {1,2,4,8}. 346 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && 347 "Invalid scale!"); 348 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 349 Res->Mem.SegReg = SegReg; 350 Res->Mem.Disp = Disp; 351 Res->Mem.BaseReg = BaseReg; 352 Res->Mem.IndexReg = IndexReg; 353 Res->Mem.Scale = Scale; 354 return Res; 355 } 356}; 357 358} // end anonymous namespace. 359 360 361bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, 362 SMLoc &StartLoc, SMLoc &EndLoc) { 363 RegNo = 0; 364 const AsmToken &TokPercent = Parser.getTok(); 365 assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); 366 StartLoc = TokPercent.getLoc(); 367 Parser.Lex(); // Eat percent token. 368 369 const AsmToken &Tok = Parser.getTok(); 370 if (Tok.isNot(AsmToken::Identifier)) 371 return Error(Tok.getLoc(), "invalid register name"); 372 373 // FIXME: Validate register for the current architecture; we have to do 374 // validation later, so maybe there is no need for this here. 375 RegNo = MatchRegisterName(Tok.getString()); 376 377 // If the match failed, try the register name as lowercase. 378 if (RegNo == 0) 379 RegNo = MatchRegisterName(LowercaseString(Tok.getString())); 380 381 // FIXME: This should be done using Requires<In32BitMode> and 382 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions 383 // can be also checked. 384 if (RegNo == X86::RIZ && !Is64Bit) 385 return Error(Tok.getLoc(), "riz register in 64-bit mode only"); 386 387 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 388 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { 389 RegNo = X86::ST0; 390 EndLoc = Tok.getLoc(); 391 Parser.Lex(); // Eat 'st' 392 393 // Check to see if we have '(4)' after %st. 394 if (getLexer().isNot(AsmToken::LParen)) 395 return false; 396 // Lex the paren. 397 getParser().Lex(); 398 399 const AsmToken &IntTok = Parser.getTok(); 400 if (IntTok.isNot(AsmToken::Integer)) 401 return Error(IntTok.getLoc(), "expected stack index"); 402 switch (IntTok.getIntVal()) { 403 case 0: RegNo = X86::ST0; break; 404 case 1: RegNo = X86::ST1; break; 405 case 2: RegNo = X86::ST2; break; 406 case 3: RegNo = X86::ST3; break; 407 case 4: RegNo = X86::ST4; break; 408 case 5: RegNo = X86::ST5; break; 409 case 6: RegNo = X86::ST6; break; 410 case 7: RegNo = X86::ST7; break; 411 default: return Error(IntTok.getLoc(), "invalid stack index"); 412 } 413 414 if (getParser().Lex().isNot(AsmToken::RParen)) 415 return Error(Parser.getTok().getLoc(), "expected ')'"); 416 417 EndLoc = Tok.getLoc(); 418 Parser.Lex(); // Eat ')' 419 return false; 420 } 421 422 // If this is "db[0-7]", match it as an alias 423 // for dr[0-7]. 424 if (RegNo == 0 && Tok.getString().size() == 3 && 425 Tok.getString().startswith("db")) { 426 switch (Tok.getString()[2]) { 427 case '0': RegNo = X86::DR0; break; 428 case '1': RegNo = X86::DR1; break; 429 case '2': RegNo = X86::DR2; break; 430 case '3': RegNo = X86::DR3; break; 431 case '4': RegNo = X86::DR4; break; 432 case '5': RegNo = X86::DR5; break; 433 case '6': RegNo = X86::DR6; break; 434 case '7': RegNo = X86::DR7; break; 435 } 436 437 if (RegNo != 0) { 438 EndLoc = Tok.getLoc(); 439 Parser.Lex(); // Eat it. 440 return false; 441 } 442 } 443 444 if (RegNo == 0) 445 return Error(Tok.getLoc(), "invalid register name"); 446 447 EndLoc = Tok.getLoc(); 448 Parser.Lex(); // Eat identifier token. 449 return false; 450} 451 452X86Operand *X86ATTAsmParser::ParseOperand() { 453 switch (getLexer().getKind()) { 454 default: 455 // Parse a memory operand with no segment register. 456 return ParseMemOperand(0, Parser.getTok().getLoc()); 457 case AsmToken::Percent: { 458 // Read the register. 459 unsigned RegNo; 460 SMLoc Start, End; 461 if (ParseRegister(RegNo, Start, End)) return 0; 462 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 463 Error(Start, "eiz and riz can only be used as index registers"); 464 return 0; 465 } 466 467 // If this is a segment register followed by a ':', then this is the start 468 // of a memory reference, otherwise this is a normal register reference. 469 if (getLexer().isNot(AsmToken::Colon)) 470 return X86Operand::CreateReg(RegNo, Start, End); 471 472 473 getParser().Lex(); // Eat the colon. 474 return ParseMemOperand(RegNo, Start); 475 } 476 case AsmToken::Dollar: { 477 // $42 -> immediate. 478 SMLoc Start = Parser.getTok().getLoc(), End; 479 Parser.Lex(); 480 const MCExpr *Val; 481 if (getParser().ParseExpression(Val, End)) 482 return 0; 483 return X86Operand::CreateImm(Val, Start, End); 484 } 485 } 486} 487 488/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 489/// has already been parsed if present. 490X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { 491 492 // We have to disambiguate a parenthesized expression "(4+5)" from the start 493 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 494 // only way to do this without lookahead is to eat the '(' and see what is 495 // after it. 496 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 497 if (getLexer().isNot(AsmToken::LParen)) { 498 SMLoc ExprEnd; 499 if (getParser().ParseExpression(Disp, ExprEnd)) return 0; 500 501 // After parsing the base expression we could either have a parenthesized 502 // memory address or not. If not, return now. If so, eat the (. 503 if (getLexer().isNot(AsmToken::LParen)) { 504 // Unless we have a segment register, treat this as an immediate. 505 if (SegReg == 0) 506 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 507 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 508 } 509 510 // Eat the '('. 511 Parser.Lex(); 512 } else { 513 // Okay, we have a '('. We don't know if this is an expression or not, but 514 // so we have to eat the ( to see beyond it. 515 SMLoc LParenLoc = Parser.getTok().getLoc(); 516 Parser.Lex(); // Eat the '('. 517 518 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 519 // Nothing to do here, fall into the code below with the '(' part of the 520 // memory operand consumed. 521 } else { 522 SMLoc ExprEnd; 523 524 // It must be an parenthesized expression, parse it now. 525 if (getParser().ParseParenExpression(Disp, ExprEnd)) 526 return 0; 527 528 // After parsing the base expression we could either have a parenthesized 529 // memory address or not. If not, return now. If so, eat the (. 530 if (getLexer().isNot(AsmToken::LParen)) { 531 // Unless we have a segment register, treat this as an immediate. 532 if (SegReg == 0) 533 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 534 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 535 } 536 537 // Eat the '('. 538 Parser.Lex(); 539 } 540 } 541 542 // If we reached here, then we just ate the ( of the memory operand. Process 543 // the rest of the memory operand. 544 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 545 546 if (getLexer().is(AsmToken::Percent)) { 547 SMLoc L; 548 if (ParseRegister(BaseReg, L, L)) return 0; 549 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 550 Error(L, "eiz and riz can only be used as index registers"); 551 return 0; 552 } 553 } 554 555 if (getLexer().is(AsmToken::Comma)) { 556 Parser.Lex(); // Eat the comma. 557 558 // Following the comma we should have either an index register, or a scale 559 // value. We don't support the later form, but we want to parse it 560 // correctly. 561 // 562 // Not that even though it would be completely consistent to support syntax 563 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 564 if (getLexer().is(AsmToken::Percent)) { 565 SMLoc L; 566 if (ParseRegister(IndexReg, L, L)) return 0; 567 568 if (getLexer().isNot(AsmToken::RParen)) { 569 // Parse the scale amount: 570 // ::= ',' [scale-expression] 571 if (getLexer().isNot(AsmToken::Comma)) { 572 Error(Parser.getTok().getLoc(), 573 "expected comma in scale expression"); 574 return 0; 575 } 576 Parser.Lex(); // Eat the comma. 577 578 if (getLexer().isNot(AsmToken::RParen)) { 579 SMLoc Loc = Parser.getTok().getLoc(); 580 581 int64_t ScaleVal; 582 if (getParser().ParseAbsoluteExpression(ScaleVal)) 583 return 0; 584 585 // Validate the scale amount. 586 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 587 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 588 return 0; 589 } 590 Scale = (unsigned)ScaleVal; 591 } 592 } 593 } else if (getLexer().isNot(AsmToken::RParen)) { 594 // A scale amount without an index is ignored. 595 // index. 596 SMLoc Loc = Parser.getTok().getLoc(); 597 598 int64_t Value; 599 if (getParser().ParseAbsoluteExpression(Value)) 600 return 0; 601 602 if (Value != 1) 603 Warning(Loc, "scale factor without index register is ignored"); 604 Scale = 1; 605 } 606 } 607 608 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 609 if (getLexer().isNot(AsmToken::RParen)) { 610 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 611 return 0; 612 } 613 SMLoc MemEnd = Parser.getTok().getLoc(); 614 Parser.Lex(); // Eat the ')'. 615 616 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 617 MemStart, MemEnd); 618} 619 620bool X86ATTAsmParser:: 621ParseInstruction(StringRef Name, SMLoc NameLoc, 622 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 623 // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to 624 // represent alternative syntaxes in the .td file, without requiring 625 // instruction duplication. 626 StringRef PatchedName = StringSwitch<StringRef>(Name) 627 .Case("sal", "shl") 628 .Case("salb", "shlb") 629 .Case("sall", "shll") 630 .Case("salq", "shlq") 631 .Case("salw", "shlw") 632 .Case("repe", "rep") 633 .Case("repz", "rep") 634 .Case("repnz", "repne") 635 .Case("iret", "iretl") 636 .Case("sysret", "sysretl") 637 .Case("cbw", "cbtw") 638 .Case("cwd", "cwtd") 639 .Case("cdq", "cltd") 640 .Case("cwde", "cwtl") 641 .Case("cdqe", "cltq") 642 .Case("smovb", "movsb") 643 .Case("smovw", "movsw") 644 .Case("smovl", "movsl") 645 .Case("smovq", "movsq") 646 .Case("push", Is64Bit ? "pushq" : "pushl") 647 .Case("pop", Is64Bit ? "popq" : "popl") 648 .Case("pushf", Is64Bit ? "pushfq" : "pushfl") 649 .Case("popf", Is64Bit ? "popfq" : "popfl") 650 .Case("pushfd", "pushfl") 651 .Case("popfd", "popfl") 652 .Case("retl", Is64Bit ? "retl" : "ret") 653 .Case("retq", Is64Bit ? "ret" : "retq") 654 .Case("setz", "sete") .Case("setnz", "setne") 655 .Case("setc", "setb") .Case("setna", "setbe") 656 .Case("setnae", "setb").Case("setnb", "setae") 657 .Case("setnbe", "seta").Case("setnc", "setae") 658 .Case("setng", "setle").Case("setnge", "setl") 659 .Case("setnl", "setge").Case("setnle", "setg") 660 .Case("setpe", "setp") .Case("setpo", "setnp") 661 .Case("jz", "je") .Case("jnz", "jne") 662 .Case("jc", "jb") .Case("jna", "jbe") 663 .Case("jnae", "jb").Case("jnb", "jae") 664 .Case("jnbe", "ja").Case("jnc", "jae") 665 .Case("jng", "jle").Case("jnge", "jl") 666 .Case("jnl", "jge").Case("jnle", "jg") 667 .Case("jpe", "jp") .Case("jpo", "jnp") 668 // Condition code aliases for 16-bit, 32-bit, 64-bit and unspec operands. 669 .Case("cmovcw", "cmovbw") .Case("cmovcl", "cmovbl") 670 .Case("cmovcq", "cmovbq") .Case("cmovc", "cmovb") 671 .Case("cmovnaew","cmovbw") .Case("cmovnael","cmovbl") 672 .Case("cmovnaeq","cmovbq") .Case("cmovnae", "cmovb") 673 .Case("cmovnaw", "cmovbew").Case("cmovnal", "cmovbel") 674 .Case("cmovnaq", "cmovbeq").Case("cmovna", "cmovbe") 675 .Case("cmovnbw", "cmovaew").Case("cmovnbl", "cmovael") 676 .Case("cmovnbq", "cmovaeq").Case("cmovnb", "cmovae") 677 .Case("cmovnbew","cmovaw") .Case("cmovnbel","cmoval") 678 .Case("cmovnbeq","cmovaq") .Case("cmovnbe", "cmova") 679 .Case("cmovncw", "cmovaew").Case("cmovncl", "cmovael") 680 .Case("cmovncq", "cmovaeq").Case("cmovnc", "cmovae") 681 .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel") 682 .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle") 683 .Case("cmovnw", "cmovgew").Case("cmovnl", "cmovgel") 684 .Case("cmovnq", "cmovgeq").Case("cmovn", "cmovge") 685 .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel") 686 .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle") 687 .Case("cmovngew","cmovlw") .Case("cmovngel","cmovll") 688 .Case("cmovngeq","cmovlq") .Case("cmovnge", "cmovl") 689 .Case("cmovnlw", "cmovgew").Case("cmovnll", "cmovgel") 690 .Case("cmovnlq", "cmovgeq").Case("cmovnl", "cmovge") 691 .Case("cmovnlew","cmovgw") .Case("cmovnlel","cmovgl") 692 .Case("cmovnleq","cmovgq") .Case("cmovnle", "cmovg") 693 .Case("cmovnzw", "cmovnew").Case("cmovnzl", "cmovnel") 694 .Case("cmovnzq", "cmovneq").Case("cmovnz", "cmovne") 695 .Case("cmovzw", "cmovew") .Case("cmovzl", "cmovel") 696 .Case("cmovzq", "cmoveq") .Case("cmovz", "cmove") 697 // Floating point stack cmov aliases. 698 .Case("fcmovz", "fcmove") 699 .Case("fcmova", "fcmovnbe") 700 .Case("fcmovnae", "fcmovb") 701 .Case("fcmovna", "fcmovbe") 702 .Case("fcmovae", "fcmovnb") 703 .Case("fwait", "wait") 704 .Case("movzx", "movzb") // FIXME: Not correct. 705 .Case("fildq", "fildll") 706 .Default(Name); 707 708 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 709 const MCExpr *ExtraImmOp = 0; 710 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 711 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 712 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 713 bool IsVCMP = PatchedName.startswith("vcmp"); 714 unsigned SSECCIdx = IsVCMP ? 4 : 3; 715 unsigned SSEComparisonCode = StringSwitch<unsigned>( 716 PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) 717 .Case("eq", 0) 718 .Case("lt", 1) 719 .Case("le", 2) 720 .Case("unord", 3) 721 .Case("neq", 4) 722 .Case("nlt", 5) 723 .Case("nle", 6) 724 .Case("ord", 7) 725 .Case("eq_uq", 8) 726 .Case("nge", 9) 727 .Case("ngt", 0x0A) 728 .Case("false", 0x0B) 729 .Case("neq_oq", 0x0C) 730 .Case("ge", 0x0D) 731 .Case("gt", 0x0E) 732 .Case("true", 0x0F) 733 .Case("eq_os", 0x10) 734 .Case("lt_oq", 0x11) 735 .Case("le_oq", 0x12) 736 .Case("unord_s", 0x13) 737 .Case("neq_us", 0x14) 738 .Case("nlt_uq", 0x15) 739 .Case("nle_uq", 0x16) 740 .Case("ord_s", 0x17) 741 .Case("eq_us", 0x18) 742 .Case("nge_uq", 0x19) 743 .Case("ngt_uq", 0x1A) 744 .Case("false_os", 0x1B) 745 .Case("neq_os", 0x1C) 746 .Case("ge_oq", 0x1D) 747 .Case("gt_oq", 0x1E) 748 .Case("true_us", 0x1F) 749 .Default(~0U); 750 if (SSEComparisonCode != ~0U) { 751 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, 752 getParser().getContext()); 753 if (PatchedName.endswith("ss")) { 754 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 755 } else if (PatchedName.endswith("sd")) { 756 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 757 } else if (PatchedName.endswith("ps")) { 758 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 759 } else { 760 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); 761 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 762 } 763 } 764 } 765 766 // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq 767 if (PatchedName.startswith("vpclmul")) { 768 unsigned CLMULQuadWordSelect = StringSwitch<unsigned>( 769 PatchedName.slice(7, PatchedName.size() - 2)) 770 .Case("lqlq", 0x00) // src1[63:0], src2[63:0] 771 .Case("hqlq", 0x01) // src1[127:64], src2[63:0] 772 .Case("lqhq", 0x10) // src1[63:0], src2[127:64] 773 .Case("hqhq", 0x11) // src1[127:64], src2[127:64] 774 .Default(~0U); 775 if (CLMULQuadWordSelect != ~0U) { 776 ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect, 777 getParser().getContext()); 778 assert(PatchedName.endswith("dq") && "Unexpected mnemonic!"); 779 PatchedName = "vpclmulqdq"; 780 } 781 } 782 783 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 784 785 if (ExtraImmOp) 786 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 787 788 789 // Determine whether this is an instruction prefix. 790 bool isPrefix = 791 PatchedName == "lock" || PatchedName == "rep" || 792 PatchedName == "repne"; 793 794 795 // This does the actual operand parsing. Don't parse any more if we have a 796 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 797 // just want to parse the "lock" as the first instruction and the "incl" as 798 // the next one. 799 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 800 801 // Parse '*' modifier. 802 if (getLexer().is(AsmToken::Star)) { 803 SMLoc Loc = Parser.getTok().getLoc(); 804 Operands.push_back(X86Operand::CreateToken("*", Loc)); 805 Parser.Lex(); // Eat the star. 806 } 807 808 // Read the first operand. 809 if (X86Operand *Op = ParseOperand()) 810 Operands.push_back(Op); 811 else { 812 Parser.EatToEndOfStatement(); 813 return true; 814 } 815 816 while (getLexer().is(AsmToken::Comma)) { 817 Parser.Lex(); // Eat the comma. 818 819 // Parse and remember the operand. 820 if (X86Operand *Op = ParseOperand()) 821 Operands.push_back(Op); 822 else { 823 Parser.EatToEndOfStatement(); 824 return true; 825 } 826 } 827 828 if (getLexer().isNot(AsmToken::EndOfStatement)) { 829 Parser.EatToEndOfStatement(); 830 return TokError("unexpected token in argument list"); 831 } 832 } 833 834 if (getLexer().is(AsmToken::EndOfStatement)) 835 Parser.Lex(); // Consume the EndOfStatement 836 837 // Hack to allow 'movq <largeimm>, <reg>' as an alias for movabsq. 838 if ((Name == "movq" || Name == "mov") && Operands.size() == 3 && 839 static_cast<X86Operand*>(Operands[2])->isReg() && 840 static_cast<X86Operand*>(Operands[1])->isImm() && 841 !static_cast<X86Operand*>(Operands[1])->isImmSExti64i32()) { 842 delete Operands[0]; 843 Operands[0] = X86Operand::CreateToken("movabsq", NameLoc); 844 } 845 846 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 847 // "shift <op>". 848 if ((Name.startswith("shr") || Name.startswith("sar") || 849 Name.startswith("shl")) && 850 Operands.size() == 3) { 851 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 852 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 853 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 854 delete Operands[1]; 855 Operands.erase(Operands.begin() + 1); 856 } 857 } 858 859 // FIXME: Hack to handle recognize "rc[lr] <op>" -> "rcl $1, <op>". 860 if ((Name.startswith("rcl") || Name.startswith("rcr")) && 861 Operands.size() == 2) { 862 const MCExpr *One = MCConstantExpr::Create(1, getParser().getContext()); 863 Operands.push_back(X86Operand::CreateImm(One, NameLoc, NameLoc)); 864 std::swap(Operands[1], Operands[2]); 865 } 866 867 // FIXME: Hack to handle recognize "sh[lr]d op,op" -> "shld $1, op,op". 868 if ((Name.startswith("shld") || Name.startswith("shrd")) && 869 Operands.size() == 3) { 870 const MCExpr *One = MCConstantExpr::Create(1, getParser().getContext()); 871 Operands.insert(Operands.begin()+1, 872 X86Operand::CreateImm(One, NameLoc, NameLoc)); 873 } 874 875 876 // FIXME: Hack to handle recognize "in[bwl] <op>". Canonicalize it to 877 // "inb <op>, %al". 878 if ((Name == "inb" || Name == "inw" || Name == "inl") && 879 Operands.size() == 2) { 880 unsigned Reg; 881 if (Name[2] == 'b') 882 Reg = MatchRegisterName("al"); 883 else if (Name[2] == 'w') 884 Reg = MatchRegisterName("ax"); 885 else 886 Reg = MatchRegisterName("eax"); 887 SMLoc Loc = Operands.back()->getEndLoc(); 888 Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc)); 889 } 890 891 // FIXME: Hack to handle recognize "out[bwl] <op>". Canonicalize it to 892 // "outb %al, <op>". 893 if ((Name == "outb" || Name == "outw" || Name == "outl") && 894 Operands.size() == 2) { 895 unsigned Reg; 896 if (Name[3] == 'b') 897 Reg = MatchRegisterName("al"); 898 else if (Name[3] == 'w') 899 Reg = MatchRegisterName("ax"); 900 else 901 Reg = MatchRegisterName("eax"); 902 SMLoc Loc = Operands.back()->getEndLoc(); 903 Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc)); 904 std::swap(Operands[1], Operands[2]); 905 } 906 907 // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx". 908 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && 909 Operands.size() == 3) { 910 X86Operand &Op = *(X86Operand*)Operands.back(); 911 if (Op.isMem() && Op.Mem.SegReg == 0 && 912 isa<MCConstantExpr>(Op.Mem.Disp) && 913 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 914 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 915 SMLoc Loc = Op.getEndLoc(); 916 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 917 delete &Op; 918 } 919 } 920 921 // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as 922 // "f{mul*,add*,sub*,div*} $op" 923 if ((Name.startswith("fmul") || Name.startswith("fadd") || 924 Name.startswith("fsub") || Name.startswith("fdiv")) && 925 Operands.size() == 3 && 926 static_cast<X86Operand*>(Operands[2])->isReg() && 927 static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) { 928 delete Operands[2]; 929 Operands.erase(Operands.begin() + 2); 930 } 931 932 // FIXME: Hack to handle "f{mulp,addp} st(0), $op" the same as 933 // "f{mulp,addp} $op", since they commute. We also allow fdivrp/fsubrp even 934 // though they don't commute, solely because gas does support this. 935 if ((Name=="fmulp" || Name=="faddp" || Name=="fsubrp" || Name=="fdivrp") && 936 Operands.size() == 3 && 937 static_cast<X86Operand*>(Operands[1])->isReg() && 938 static_cast<X86Operand*>(Operands[1])->getReg() == X86::ST0) { 939 delete Operands[1]; 940 Operands.erase(Operands.begin() + 1); 941 } 942 943 // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B, 944 // B". 945 if (Name.startswith("imul") && Operands.size() == 3 && 946 static_cast<X86Operand*>(Operands[1])->isImm() && 947 static_cast<X86Operand*>(Operands.back())->isReg()) { 948 X86Operand *Op = static_cast<X86Operand*>(Operands.back()); 949 Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(), 950 Op->getEndLoc())); 951 } 952 953 // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same 954 // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity 955 // errors, since its encoding is the most compact. 956 if (Name == "sldt" && Operands.size() == 2 && 957 static_cast<X86Operand*>(Operands[1])->isMem()) { 958 delete Operands[0]; 959 Operands[0] = X86Operand::CreateToken("sldtw", NameLoc); 960 } 961 962 // The assembler accepts "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as 963 // synonyms. Our tables only have the "<reg>, <mem>" form, so if we see the 964 // other operand order, swap them. 965 if (Name == "xchgb" || Name == "xchgw" || Name == "xchgl" || Name == "xchgq"|| 966 Name == "xchg") 967 if (Operands.size() == 3 && 968 static_cast<X86Operand*>(Operands[1])->isMem() && 969 static_cast<X86Operand*>(Operands[2])->isReg()) { 970 std::swap(Operands[1], Operands[2]); 971 } 972 973 // The assembler accepts "testX <reg>, <mem>" and "testX <mem>, <reg>" as 974 // synonyms. Our tables only have the "<mem>, <reg>" form, so if we see the 975 // other operand order, swap them. 976 if (Name == "testb" || Name == "testw" || Name == "testl" || Name == "testq"|| 977 Name == "test") 978 if (Operands.size() == 3 && 979 static_cast<X86Operand*>(Operands[1])->isReg() && 980 static_cast<X86Operand*>(Operands[2])->isMem()) { 981 std::swap(Operands[1], Operands[2]); 982 } 983 984 // The assembler accepts these instructions with no operand as a synonym for 985 // an instruction acting on st(1). e.g. "fxch" -> "fxch %st(1)". 986 if ((Name == "fxch" || Name == "fucom" || Name == "fucomp" || 987 Name == "faddp" || Name == "fsubp" || Name == "fsubrp" || 988 Name == "fmulp" || Name == "fdivp" || Name == "fdivrp") && 989 Operands.size() == 1) { 990 Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"), 991 NameLoc, NameLoc)); 992 } 993 994 // The assembler accepts these instructions with two few operands as a synonym 995 // for taking %st(1),%st(0) or X, %st(0). 996 if ((Name == "fcomi" || Name == "fucomi") && Operands.size() < 3) { 997 if (Operands.size() == 1) 998 Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"), 999 NameLoc, NameLoc)); 1000 Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(0)"), 1001 NameLoc, NameLoc)); 1002 } 1003 1004 // The assembler accepts various amounts of brokenness for fnstsw. 1005 if (Name == "fnstsw") { 1006 if (Operands.size() == 2 && 1007 static_cast<X86Operand*>(Operands[1])->isReg()) { 1008 // "fnstsw al" and "fnstsw eax" -> "fnstw" 1009 unsigned Reg = static_cast<X86Operand*>(Operands[1])->Reg.RegNo; 1010 if (Reg == MatchRegisterName("eax") || 1011 Reg == MatchRegisterName("al")) { 1012 delete Operands[1]; 1013 Operands.pop_back(); 1014 } 1015 } 1016 1017 // "fnstw" -> "fnstw %ax" 1018 if (Operands.size() == 1) 1019 Operands.push_back(X86Operand::CreateReg(MatchRegisterName("ax"), 1020 NameLoc, NameLoc)); 1021 } 1022 1023 // jmp $42,$5 -> ljmp, similarly for call. 1024 if ((Name.startswith("call") || Name.startswith("jmp")) && 1025 Operands.size() == 3 && 1026 static_cast<X86Operand*>(Operands[1])->isImm() && 1027 static_cast<X86Operand*>(Operands[2])->isImm()) { 1028 const char *NewOpName = StringSwitch<const char *>(Name) 1029 .Case("jmp", "ljmp") 1030 .Case("jmpw", "ljmpw") 1031 .Case("jmpl", "ljmpl") 1032 .Case("jmpq", "ljmpq") 1033 .Case("call", "lcall") 1034 .Case("callw", "lcallw") 1035 .Case("calll", "lcalll") 1036 .Case("callq", "lcallq") 1037 .Default(0); 1038 if (NewOpName) { 1039 delete Operands[0]; 1040 Operands[0] = X86Operand::CreateToken(NewOpName, NameLoc); 1041 Name = NewOpName; 1042 } 1043 } 1044 1045 // lcall and ljmp -> lcalll and ljmpl 1046 if ((Name == "lcall" || Name == "ljmp") && Operands.size() == 3) { 1047 delete Operands[0]; 1048 Operands[0] = X86Operand::CreateToken(Name == "lcall" ? "lcalll" : "ljmpl", 1049 NameLoc); 1050 } 1051 1052 // call foo is not ambiguous with callw. 1053 if (Name == "call" && Operands.size() == 2) { 1054 const char *NewName = Is64Bit ? "callq" : "calll"; 1055 delete Operands[0]; 1056 Operands[0] = X86Operand::CreateToken(NewName, NameLoc); 1057 Name = NewName; 1058 } 1059 1060 // movsd -> movsl (when no operands are specified). 1061 if (Name == "movsd" && Operands.size() == 1) { 1062 delete Operands[0]; 1063 Operands[0] = X86Operand::CreateToken("movsl", NameLoc); 1064 } 1065 1066 // fstp <mem> -> fstps <mem>. Without this, we'll default to fstpl due to 1067 // suffix searching. 1068 if (Name == "fstp" && Operands.size() == 2 && 1069 static_cast<X86Operand*>(Operands[1])->isMem()) { 1070 delete Operands[0]; 1071 Operands[0] = X86Operand::CreateToken("fstps", NameLoc); 1072 } 1073 1074 1075 // "clr <reg>" -> "xor <reg>, <reg>". 1076 if ((Name == "clrb" || Name == "clrw" || Name == "clrl" || Name == "clrq" || 1077 Name == "clr") && Operands.size() == 2 && 1078 static_cast<X86Operand*>(Operands[1])->isReg()) { 1079 unsigned RegNo = static_cast<X86Operand*>(Operands[1])->getReg(); 1080 Operands.push_back(X86Operand::CreateReg(RegNo, NameLoc, NameLoc)); 1081 delete Operands[0]; 1082 Operands[0] = X86Operand::CreateToken("xor", NameLoc); 1083 } 1084 1085 return false; 1086} 1087 1088bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { 1089 StringRef IDVal = DirectiveID.getIdentifier(); 1090 if (IDVal == ".word") 1091 return ParseDirectiveWord(2, DirectiveID.getLoc()); 1092 return true; 1093} 1094 1095/// ParseDirectiveWord 1096/// ::= .word [ expression (, expression)* ] 1097bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 1098 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1099 for (;;) { 1100 const MCExpr *Value; 1101 if (getParser().ParseExpression(Value)) 1102 return true; 1103 1104 getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); 1105 1106 if (getLexer().is(AsmToken::EndOfStatement)) 1107 break; 1108 1109 // FIXME: Improve diagnostic. 1110 if (getLexer().isNot(AsmToken::Comma)) 1111 return Error(L, "unexpected token in directive"); 1112 Parser.Lex(); 1113 } 1114 } 1115 1116 Parser.Lex(); 1117 return false; 1118} 1119 1120 1121bool X86ATTAsmParser:: 1122MatchAndEmitInstruction(SMLoc IDLoc, 1123 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 1124 MCStreamer &Out) { 1125 assert(!Operands.empty() && "Unexpect empty operand list!"); 1126 X86Operand *Op = static_cast<X86Operand*>(Operands[0]); 1127 assert(Op->isToken() && "Leading operand should always be a mnemonic!"); 1128 1129 // First, handle aliases that expand to multiple instructions. 1130 // FIXME: This should be replaced with a real .td file alias mechanism. 1131 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || 1132 Op->getToken() == "finit" || Op->getToken() == "fsave" || 1133 Op->getToken() == "fstenv") { 1134 MCInst Inst; 1135 Inst.setOpcode(X86::WAIT); 1136 Out.EmitInstruction(Inst); 1137 1138 const char *Repl = 1139 StringSwitch<const char*>(Op->getToken()) 1140 .Case("finit", "fninit") 1141 .Case("fsave", "fnsave") 1142 .Case("fstcw", "fnstcw") 1143 .Case("fstenv", "fnstenv") 1144 .Case("fstsw", "fnstsw") 1145 .Default(0); 1146 assert(Repl && "Unknown wait-prefixed instruction"); 1147 delete Operands[0]; 1148 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 1149 } 1150 1151 bool WasOriginallyInvalidOperand = false; 1152 unsigned OrigErrorInfo; 1153 MCInst Inst; 1154 1155 // First, try a direct match. 1156 switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) { 1157 case Match_Success: 1158 Out.EmitInstruction(Inst); 1159 return false; 1160 case Match_MissingFeature: 1161 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 1162 return true; 1163 case Match_InvalidOperand: 1164 WasOriginallyInvalidOperand = true; 1165 break; 1166 case Match_MnemonicFail: 1167 break; 1168 } 1169 1170 // FIXME: Ideally, we would only attempt suffix matches for things which are 1171 // valid prefixes, and we could just infer the right unambiguous 1172 // type. However, that requires substantially more matcher support than the 1173 // following hack. 1174 1175 // Change the operand to point to a temporary token. 1176 StringRef Base = Op->getToken(); 1177 SmallString<16> Tmp; 1178 Tmp += Base; 1179 Tmp += ' '; 1180 Op->setTokenValue(Tmp.str()); 1181 1182 // Check for the various suffix matches. 1183 Tmp[Base.size()] = 'b'; 1184 unsigned BErrorInfo, WErrorInfo, LErrorInfo, QErrorInfo; 1185 MatchResultTy MatchB = MatchInstructionImpl(Operands, Inst, BErrorInfo); 1186 Tmp[Base.size()] = 'w'; 1187 MatchResultTy MatchW = MatchInstructionImpl(Operands, Inst, WErrorInfo); 1188 Tmp[Base.size()] = 'l'; 1189 MatchResultTy MatchL = MatchInstructionImpl(Operands, Inst, LErrorInfo); 1190 Tmp[Base.size()] = 'q'; 1191 MatchResultTy MatchQ = MatchInstructionImpl(Operands, Inst, QErrorInfo); 1192 1193 // Restore the old token. 1194 Op->setTokenValue(Base); 1195 1196 // If exactly one matched, then we treat that as a successful match (and the 1197 // instruction will already have been filled in correctly, since the failing 1198 // matches won't have modified it). 1199 unsigned NumSuccessfulMatches = 1200 (MatchB == Match_Success) + (MatchW == Match_Success) + 1201 (MatchL == Match_Success) + (MatchQ == Match_Success); 1202 if (NumSuccessfulMatches == 1) { 1203 Out.EmitInstruction(Inst); 1204 return false; 1205 } 1206 1207 // Otherwise, the match failed, try to produce a decent error message. 1208 1209 // If we had multiple suffix matches, then identify this as an ambiguous 1210 // match. 1211 if (NumSuccessfulMatches > 1) { 1212 char MatchChars[4]; 1213 unsigned NumMatches = 0; 1214 if (MatchB == Match_Success) 1215 MatchChars[NumMatches++] = 'b'; 1216 if (MatchW == Match_Success) 1217 MatchChars[NumMatches++] = 'w'; 1218 if (MatchL == Match_Success) 1219 MatchChars[NumMatches++] = 'l'; 1220 if (MatchQ == Match_Success) 1221 MatchChars[NumMatches++] = 'q'; 1222 1223 SmallString<126> Msg; 1224 raw_svector_ostream OS(Msg); 1225 OS << "ambiguous instructions require an explicit suffix (could be "; 1226 for (unsigned i = 0; i != NumMatches; ++i) { 1227 if (i != 0) 1228 OS << ", "; 1229 if (i + 1 == NumMatches) 1230 OS << "or "; 1231 OS << "'" << Base << MatchChars[i] << "'"; 1232 } 1233 OS << ")"; 1234 Error(IDLoc, OS.str()); 1235 return true; 1236 } 1237 1238 // Okay, we know that none of the variants matched successfully. 1239 1240 // If all of the instructions reported an invalid mnemonic, then the original 1241 // mnemonic was invalid. 1242 if ((MatchB == Match_MnemonicFail) && (MatchW == Match_MnemonicFail) && 1243 (MatchL == Match_MnemonicFail) && (MatchQ == Match_MnemonicFail)) { 1244 if (!WasOriginallyInvalidOperand) { 1245 Error(IDLoc, "invalid instruction mnemonic '" + Base + "'"); 1246 return true; 1247 } 1248 1249 // Recover location info for the operand if we know which was the problem. 1250 SMLoc ErrorLoc = IDLoc; 1251 if (OrigErrorInfo != ~0U) { 1252 if (OrigErrorInfo >= Operands.size()) 1253 return Error(IDLoc, "too few operands for instruction"); 1254 1255 ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc(); 1256 if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; 1257 } 1258 1259 return Error(ErrorLoc, "invalid operand for instruction"); 1260 } 1261 1262 // If one instruction matched with a missing feature, report this as a 1263 // missing feature. 1264 if ((MatchB == Match_MissingFeature) + (MatchW == Match_MissingFeature) + 1265 (MatchL == Match_MissingFeature) + (MatchQ == Match_MissingFeature) == 1){ 1266 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 1267 return true; 1268 } 1269 1270 // If one instruction matched with an invalid operand, report this as an 1271 // operand failure. 1272 if ((MatchB == Match_InvalidOperand) + (MatchW == Match_InvalidOperand) + 1273 (MatchL == Match_InvalidOperand) + (MatchQ == Match_InvalidOperand) == 1){ 1274 Error(IDLoc, "invalid operand for instruction"); 1275 return true; 1276 } 1277 1278 // If all of these were an outright failure, report it in a useless way. 1279 // FIXME: We should give nicer diagnostics about the exact failure. 1280 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); 1281 return true; 1282} 1283 1284 1285extern "C" void LLVMInitializeX86AsmLexer(); 1286 1287// Force static initialization. 1288extern "C" void LLVMInitializeX86AsmParser() { 1289 RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target); 1290 RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target); 1291 LLVMInitializeX86AsmLexer(); 1292} 1293 1294#define GET_REGISTER_MATCHER 1295#define GET_MATCHER_IMPLEMENTATION 1296#include "X86GenAsmMatcher.inc" 1297