X86AsmParser.cpp revision ef63c9a9b6f79fef91dc144db9d5f217d2b83a95
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "llvm/Target/TargetAsmParser.h" 11#include "X86.h" 12#include "X86Subtarget.h" 13#include "llvm/ADT/SmallString.h" 14#include "llvm/ADT/SmallVector.h" 15#include "llvm/ADT/StringSwitch.h" 16#include "llvm/ADT/Twine.h" 17#include "llvm/MC/MCStreamer.h" 18#include "llvm/MC/MCExpr.h" 19#include "llvm/MC/MCInst.h" 20#include "llvm/MC/MCParser/MCAsmLexer.h" 21#include "llvm/MC/MCParser/MCAsmParser.h" 22#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 23#include "llvm/Support/SourceMgr.h" 24#include "llvm/Support/raw_ostream.h" 25#include "llvm/Target/TargetRegistry.h" 26#include "llvm/Target/TargetAsmParser.h" 27using namespace llvm; 28 29namespace { 30struct X86Operand; 31 32class X86ATTAsmParser : public TargetAsmParser { 33 MCAsmParser &Parser; 34 TargetMachine &TM; 35 36protected: 37 unsigned Is64Bit : 1; 38 39private: 40 MCAsmParser &getParser() const { return Parser; } 41 42 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 43 44 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 45 46 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); 47 48 X86Operand *ParseOperand(); 49 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 50 51 bool ParseDirectiveWord(unsigned Size, SMLoc L); 52 53 bool MatchInstruction(SMLoc IDLoc, 54 const SmallVectorImpl<MCParsedAsmOperand*> &Operands, 55 MCInst &Inst); 56 57 /// @name Auto-generated Matcher Functions 58 /// { 59 60#define GET_ASSEMBLER_HEADER 61#include "X86GenAsmMatcher.inc" 62 63 /// } 64 65public: 66 X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) 67 : TargetAsmParser(T), Parser(_Parser), TM(TM) { 68 69 // Initialize the set of available features. 70 setAvailableFeatures(ComputeAvailableFeatures( 71 &TM.getSubtarget<X86Subtarget>())); 72 } 73 74 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, 75 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 76 77 virtual bool ParseDirective(AsmToken DirectiveID); 78}; 79 80class X86_32ATTAsmParser : public X86ATTAsmParser { 81public: 82 X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) 83 : X86ATTAsmParser(T, _Parser, TM) { 84 Is64Bit = false; 85 } 86}; 87 88class X86_64ATTAsmParser : public X86ATTAsmParser { 89public: 90 X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) 91 : X86ATTAsmParser(T, _Parser, TM) { 92 Is64Bit = true; 93 } 94}; 95 96} // end anonymous namespace 97 98/// @name Auto-generated Match Functions 99/// { 100 101static unsigned MatchRegisterName(StringRef Name); 102 103/// } 104 105namespace { 106 107/// X86Operand - Instances of this class represent a parsed X86 machine 108/// instruction. 109struct X86Operand : public MCParsedAsmOperand { 110 enum KindTy { 111 Token, 112 Register, 113 Immediate, 114 Memory 115 } Kind; 116 117 SMLoc StartLoc, EndLoc; 118 119 union { 120 struct { 121 const char *Data; 122 unsigned Length; 123 } Tok; 124 125 struct { 126 unsigned RegNo; 127 } Reg; 128 129 struct { 130 const MCExpr *Val; 131 } Imm; 132 133 struct { 134 unsigned SegReg; 135 const MCExpr *Disp; 136 unsigned BaseReg; 137 unsigned IndexReg; 138 unsigned Scale; 139 } Mem; 140 }; 141 142 X86Operand(KindTy K, SMLoc Start, SMLoc End) 143 : Kind(K), StartLoc(Start), EndLoc(End) {} 144 145 /// getStartLoc - Get the location of the first token of this operand. 146 SMLoc getStartLoc() const { return StartLoc; } 147 /// getEndLoc - Get the location of the last token of this operand. 148 SMLoc getEndLoc() const { return EndLoc; } 149 150 virtual void dump(raw_ostream &OS) const {} 151 152 StringRef getToken() const { 153 assert(Kind == Token && "Invalid access!"); 154 return StringRef(Tok.Data, Tok.Length); 155 } 156 void setTokenValue(StringRef Value) { 157 assert(Kind == Token && "Invalid access!"); 158 Tok.Data = Value.data(); 159 Tok.Length = Value.size(); 160 } 161 162 unsigned getReg() const { 163 assert(Kind == Register && "Invalid access!"); 164 return Reg.RegNo; 165 } 166 167 const MCExpr *getImm() const { 168 assert(Kind == Immediate && "Invalid access!"); 169 return Imm.Val; 170 } 171 172 const MCExpr *getMemDisp() const { 173 assert(Kind == Memory && "Invalid access!"); 174 return Mem.Disp; 175 } 176 unsigned getMemSegReg() const { 177 assert(Kind == Memory && "Invalid access!"); 178 return Mem.SegReg; 179 } 180 unsigned getMemBaseReg() const { 181 assert(Kind == Memory && "Invalid access!"); 182 return Mem.BaseReg; 183 } 184 unsigned getMemIndexReg() const { 185 assert(Kind == Memory && "Invalid access!"); 186 return Mem.IndexReg; 187 } 188 unsigned getMemScale() const { 189 assert(Kind == Memory && "Invalid access!"); 190 return Mem.Scale; 191 } 192 193 bool isToken() const {return Kind == Token; } 194 195 bool isImm() const { return Kind == Immediate; } 196 197 bool isImmSExti16i8() const { 198 if (!isImm()) 199 return false; 200 201 // If this isn't a constant expr, just assume it fits and let relaxation 202 // handle it. 203 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 204 if (!CE) 205 return true; 206 207 // Otherwise, check the value is in a range that makes sense for this 208 // extension. 209 uint64_t Value = CE->getValue(); 210 return (( Value <= 0x000000000000007FULL)|| 211 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| 212 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 213 } 214 bool isImmSExti32i8() const { 215 if (!isImm()) 216 return false; 217 218 // If this isn't a constant expr, just assume it fits and let relaxation 219 // handle it. 220 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 221 if (!CE) 222 return true; 223 224 // Otherwise, check the value is in a range that makes sense for this 225 // extension. 226 uint64_t Value = CE->getValue(); 227 return (( Value <= 0x000000000000007FULL)|| 228 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| 229 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 230 } 231 bool isImmSExti64i8() const { 232 if (!isImm()) 233 return false; 234 235 // If this isn't a constant expr, just assume it fits and let relaxation 236 // handle it. 237 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 238 if (!CE) 239 return true; 240 241 // Otherwise, check the value is in a range that makes sense for this 242 // extension. 243 uint64_t Value = CE->getValue(); 244 return (( Value <= 0x000000000000007FULL)|| 245 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 246 } 247 bool isImmSExti64i32() const { 248 if (!isImm()) 249 return false; 250 251 // If this isn't a constant expr, just assume it fits and let relaxation 252 // handle it. 253 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 254 if (!CE) 255 return true; 256 257 // Otherwise, check the value is in a range that makes sense for this 258 // extension. 259 uint64_t Value = CE->getValue(); 260 return (( Value <= 0x000000007FFFFFFFULL)|| 261 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 262 } 263 264 bool isMem() const { return Kind == Memory; } 265 266 bool isAbsMem() const { 267 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && 268 !getMemIndexReg() && getMemScale() == 1; 269 } 270 271 bool isReg() const { return Kind == Register; } 272 273 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 274 // Add as immediates when possible. 275 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 276 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 277 else 278 Inst.addOperand(MCOperand::CreateExpr(Expr)); 279 } 280 281 void addRegOperands(MCInst &Inst, unsigned N) const { 282 assert(N == 1 && "Invalid number of operands!"); 283 Inst.addOperand(MCOperand::CreateReg(getReg())); 284 } 285 286 void addImmOperands(MCInst &Inst, unsigned N) const { 287 assert(N == 1 && "Invalid number of operands!"); 288 addExpr(Inst, getImm()); 289 } 290 291 void addMemOperands(MCInst &Inst, unsigned N) const { 292 assert((N == 5) && "Invalid number of operands!"); 293 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 294 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 295 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 296 addExpr(Inst, getMemDisp()); 297 Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); 298 } 299 300 void addAbsMemOperands(MCInst &Inst, unsigned N) const { 301 assert((N == 1) && "Invalid number of operands!"); 302 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 303 } 304 305 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { 306 X86Operand *Res = new X86Operand(Token, Loc, Loc); 307 Res->Tok.Data = Str.data(); 308 Res->Tok.Length = Str.size(); 309 return Res; 310 } 311 312 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) { 313 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); 314 Res->Reg.RegNo = RegNo; 315 return Res; 316 } 317 318 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ 319 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); 320 Res->Imm.Val = Val; 321 return Res; 322 } 323 324 /// Create an absolute memory operand. 325 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, 326 SMLoc EndLoc) { 327 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 328 Res->Mem.SegReg = 0; 329 Res->Mem.Disp = Disp; 330 Res->Mem.BaseReg = 0; 331 Res->Mem.IndexReg = 0; 332 Res->Mem.Scale = 1; 333 return Res; 334 } 335 336 /// Create a generalized memory operand. 337 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, 338 unsigned BaseReg, unsigned IndexReg, 339 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) { 340 // We should never just have a displacement, that should be parsed as an 341 // absolute memory operand. 342 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); 343 344 // The scale should always be one of {1,2,4,8}. 345 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && 346 "Invalid scale!"); 347 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 348 Res->Mem.SegReg = SegReg; 349 Res->Mem.Disp = Disp; 350 Res->Mem.BaseReg = BaseReg; 351 Res->Mem.IndexReg = IndexReg; 352 Res->Mem.Scale = Scale; 353 return Res; 354 } 355}; 356 357} // end anonymous namespace. 358 359 360bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, 361 SMLoc &StartLoc, SMLoc &EndLoc) { 362 RegNo = 0; 363 const AsmToken &TokPercent = Parser.getTok(); 364 assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); 365 StartLoc = TokPercent.getLoc(); 366 Parser.Lex(); // Eat percent token. 367 368 const AsmToken &Tok = Parser.getTok(); 369 if (Tok.isNot(AsmToken::Identifier)) 370 return Error(Tok.getLoc(), "invalid register name"); 371 372 // FIXME: Validate register for the current architecture; we have to do 373 // validation later, so maybe there is no need for this here. 374 RegNo = MatchRegisterName(Tok.getString()); 375 376 // FIXME: This should be done using Requires<In32BitMode> and 377 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions 378 // can be also checked. 379 if (RegNo == X86::RIZ && !Is64Bit) 380 return Error(Tok.getLoc(), "riz register in 64-bit mode only"); 381 382 // Parse %st(1) and "%st" as "%st(0)" 383 if (RegNo == 0 && Tok.getString() == "st") { 384 RegNo = X86::ST0; 385 EndLoc = Tok.getLoc(); 386 Parser.Lex(); // Eat 'st' 387 388 // Check to see if we have '(4)' after %st. 389 if (getLexer().isNot(AsmToken::LParen)) 390 return false; 391 // Lex the paren. 392 getParser().Lex(); 393 394 const AsmToken &IntTok = Parser.getTok(); 395 if (IntTok.isNot(AsmToken::Integer)) 396 return Error(IntTok.getLoc(), "expected stack index"); 397 switch (IntTok.getIntVal()) { 398 case 0: RegNo = X86::ST0; break; 399 case 1: RegNo = X86::ST1; break; 400 case 2: RegNo = X86::ST2; break; 401 case 3: RegNo = X86::ST3; break; 402 case 4: RegNo = X86::ST4; break; 403 case 5: RegNo = X86::ST5; break; 404 case 6: RegNo = X86::ST6; break; 405 case 7: RegNo = X86::ST7; break; 406 default: return Error(IntTok.getLoc(), "invalid stack index"); 407 } 408 409 if (getParser().Lex().isNot(AsmToken::RParen)) 410 return Error(Parser.getTok().getLoc(), "expected ')'"); 411 412 EndLoc = Tok.getLoc(); 413 Parser.Lex(); // Eat ')' 414 return false; 415 } 416 417 // If this is "db[0-7]", match it as an alias 418 // for dr[0-7]. 419 if (RegNo == 0 && Tok.getString().size() == 3 && 420 Tok.getString().startswith("db")) { 421 switch (Tok.getString()[2]) { 422 case '0': RegNo = X86::DR0; break; 423 case '1': RegNo = X86::DR1; break; 424 case '2': RegNo = X86::DR2; break; 425 case '3': RegNo = X86::DR3; break; 426 case '4': RegNo = X86::DR4; break; 427 case '5': RegNo = X86::DR5; break; 428 case '6': RegNo = X86::DR6; break; 429 case '7': RegNo = X86::DR7; break; 430 } 431 432 if (RegNo != 0) { 433 EndLoc = Tok.getLoc(); 434 Parser.Lex(); // Eat it. 435 return false; 436 } 437 } 438 439 if (RegNo == 0) 440 return Error(Tok.getLoc(), "invalid register name"); 441 442 EndLoc = Tok.getLoc(); 443 Parser.Lex(); // Eat identifier token. 444 return false; 445} 446 447X86Operand *X86ATTAsmParser::ParseOperand() { 448 switch (getLexer().getKind()) { 449 default: 450 // Parse a memory operand with no segment register. 451 return ParseMemOperand(0, Parser.getTok().getLoc()); 452 case AsmToken::Percent: { 453 // Read the register. 454 unsigned RegNo; 455 SMLoc Start, End; 456 if (ParseRegister(RegNo, Start, End)) return 0; 457 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 458 Error(Start, "eiz and riz can only be used as index registers"); 459 return 0; 460 } 461 462 // If this is a segment register followed by a ':', then this is the start 463 // of a memory reference, otherwise this is a normal register reference. 464 if (getLexer().isNot(AsmToken::Colon)) 465 return X86Operand::CreateReg(RegNo, Start, End); 466 467 468 getParser().Lex(); // Eat the colon. 469 return ParseMemOperand(RegNo, Start); 470 } 471 case AsmToken::Dollar: { 472 // $42 -> immediate. 473 SMLoc Start = Parser.getTok().getLoc(), End; 474 Parser.Lex(); 475 const MCExpr *Val; 476 if (getParser().ParseExpression(Val, End)) 477 return 0; 478 return X86Operand::CreateImm(Val, Start, End); 479 } 480 } 481} 482 483/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 484/// has already been parsed if present. 485X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { 486 487 // We have to disambiguate a parenthesized expression "(4+5)" from the start 488 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 489 // only way to do this without lookahead is to eat the '(' and see what is 490 // after it. 491 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 492 if (getLexer().isNot(AsmToken::LParen)) { 493 SMLoc ExprEnd; 494 if (getParser().ParseExpression(Disp, ExprEnd)) return 0; 495 496 // After parsing the base expression we could either have a parenthesized 497 // memory address or not. If not, return now. If so, eat the (. 498 if (getLexer().isNot(AsmToken::LParen)) { 499 // Unless we have a segment register, treat this as an immediate. 500 if (SegReg == 0) 501 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 502 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 503 } 504 505 // Eat the '('. 506 Parser.Lex(); 507 } else { 508 // Okay, we have a '('. We don't know if this is an expression or not, but 509 // so we have to eat the ( to see beyond it. 510 SMLoc LParenLoc = Parser.getTok().getLoc(); 511 Parser.Lex(); // Eat the '('. 512 513 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 514 // Nothing to do here, fall into the code below with the '(' part of the 515 // memory operand consumed. 516 } else { 517 SMLoc ExprEnd; 518 519 // It must be an parenthesized expression, parse it now. 520 if (getParser().ParseParenExpression(Disp, ExprEnd)) 521 return 0; 522 523 // After parsing the base expression we could either have a parenthesized 524 // memory address or not. If not, return now. If so, eat the (. 525 if (getLexer().isNot(AsmToken::LParen)) { 526 // Unless we have a segment register, treat this as an immediate. 527 if (SegReg == 0) 528 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 529 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 530 } 531 532 // Eat the '('. 533 Parser.Lex(); 534 } 535 } 536 537 // If we reached here, then we just ate the ( of the memory operand. Process 538 // the rest of the memory operand. 539 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 540 541 if (getLexer().is(AsmToken::Percent)) { 542 SMLoc L; 543 if (ParseRegister(BaseReg, L, L)) return 0; 544 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 545 Error(L, "eiz and riz can only be used as index registers"); 546 return 0; 547 } 548 } 549 550 if (getLexer().is(AsmToken::Comma)) { 551 Parser.Lex(); // Eat the comma. 552 553 // Following the comma we should have either an index register, or a scale 554 // value. We don't support the later form, but we want to parse it 555 // correctly. 556 // 557 // Not that even though it would be completely consistent to support syntax 558 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 559 if (getLexer().is(AsmToken::Percent)) { 560 SMLoc L; 561 if (ParseRegister(IndexReg, L, L)) return 0; 562 563 if (getLexer().isNot(AsmToken::RParen)) { 564 // Parse the scale amount: 565 // ::= ',' [scale-expression] 566 if (getLexer().isNot(AsmToken::Comma)) { 567 Error(Parser.getTok().getLoc(), 568 "expected comma in scale expression"); 569 return 0; 570 } 571 Parser.Lex(); // Eat the comma. 572 573 if (getLexer().isNot(AsmToken::RParen)) { 574 SMLoc Loc = Parser.getTok().getLoc(); 575 576 int64_t ScaleVal; 577 if (getParser().ParseAbsoluteExpression(ScaleVal)) 578 return 0; 579 580 // Validate the scale amount. 581 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 582 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 583 return 0; 584 } 585 Scale = (unsigned)ScaleVal; 586 } 587 } 588 } else if (getLexer().isNot(AsmToken::RParen)) { 589 // A scale amount without an index is ignored. 590 // index. 591 SMLoc Loc = Parser.getTok().getLoc(); 592 593 int64_t Value; 594 if (getParser().ParseAbsoluteExpression(Value)) 595 return 0; 596 597 if (Value != 1) 598 Warning(Loc, "scale factor without index register is ignored"); 599 Scale = 1; 600 } 601 } 602 603 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 604 if (getLexer().isNot(AsmToken::RParen)) { 605 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 606 return 0; 607 } 608 SMLoc MemEnd = Parser.getTok().getLoc(); 609 Parser.Lex(); // Eat the ')'. 610 611 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 612 MemStart, MemEnd); 613} 614 615bool X86ATTAsmParser:: 616ParseInstruction(StringRef Name, SMLoc NameLoc, 617 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 618 // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to 619 // represent alternative syntaxes in the .td file, without requiring 620 // instruction duplication. 621 StringRef PatchedName = StringSwitch<StringRef>(Name) 622 .Case("sal", "shl") 623 .Case("salb", "shlb") 624 .Case("sall", "shll") 625 .Case("salq", "shlq") 626 .Case("salw", "shlw") 627 .Case("repe", "rep") 628 .Case("repz", "rep") 629 .Case("repnz", "repne") 630 .Case("iret", "iretl") 631 .Case("sysret", "sysretl") 632 .Case("push", Is64Bit ? "pushq" : "pushl") 633 .Case("pop", Is64Bit ? "popq" : "popl") 634 .Case("pushf", Is64Bit ? "pushfq" : "pushfl") 635 .Case("popf", Is64Bit ? "popfq" : "popfl") 636 .Case("pushfd", "pushfl") 637 .Case("popfd", "popfl") 638 .Case("retl", Is64Bit ? "retl" : "ret") 639 .Case("retq", Is64Bit ? "ret" : "retq") 640 .Case("setz", "sete") .Case("setnz", "setne") 641 .Case("setc", "setb") .Case("setna", "setbe") 642 .Case("setnae", "setb").Case("setnb", "setae") 643 .Case("setnbe", "seta").Case("setnc", "setae") 644 .Case("setng", "setle").Case("setnge", "setl") 645 .Case("setnl", "setge").Case("setnle", "setg") 646 .Case("setpe", "setp") .Case("setpo", "setnp") 647 .Case("jz", "je") .Case("jnz", "jne") 648 .Case("jc", "jb") .Case("jna", "jbe") 649 .Case("jnae", "jb").Case("jnb", "jae") 650 .Case("jnbe", "ja").Case("jnc", "jae") 651 .Case("jng", "jle").Case("jnge", "jl") 652 .Case("jnl", "jge").Case("jnle", "jg") 653 .Case("jpe", "jp") .Case("jpo", "jnp") 654 // Condition code aliases for 16-bit, 32-bit, 64-bit and unspec operands. 655 .Case("cmovcw", "cmovbw") .Case("cmovcl", "cmovbl") 656 .Case("cmovcq", "cmovbq") .Case("cmovc", "cmovb") 657 .Case("cmovnaew","cmovbw") .Case("cmovnael","cmovbl") 658 .Case("cmovnaeq","cmovbq") .Case("cmovnae", "cmovb") 659 .Case("cmovnaw", "cmovbew").Case("cmovnal", "cmovbel") 660 .Case("cmovnaq", "cmovbeq").Case("cmovna", "cmovbe") 661 .Case("cmovnbw", "cmovaew").Case("cmovnbl", "cmovael") 662 .Case("cmovnbq", "cmovaeq").Case("cmovnb", "cmovae") 663 .Case("cmovnbew","cmovaw") .Case("cmovnbel","cmoval") 664 .Case("cmovnbeq","cmovaq") .Case("cmovnbe", "cmova") 665 .Case("cmovncw", "cmovaew").Case("cmovncl", "cmovael") 666 .Case("cmovncq", "cmovaeq").Case("cmovnc", "cmovae") 667 .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel") 668 .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle") 669 .Case("cmovnw", "cmovgew").Case("cmovnl", "cmovgel") 670 .Case("cmovnq", "cmovgeq").Case("cmovn", "cmovge") 671 .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel") 672 .Case("cmovngq", "cmovleq").Case("cmovng", "cmovle") 673 .Case("cmovngew","cmovlw") .Case("cmovngel","cmovll") 674 .Case("cmovngeq","cmovlq") .Case("cmovnge", "cmovl") 675 .Case("cmovnlw", "cmovgew").Case("cmovnll", "cmovgel") 676 .Case("cmovnlq", "cmovgeq").Case("cmovnl", "cmovge") 677 .Case("cmovnlew","cmovgw") .Case("cmovnlel","cmovgl") 678 .Case("cmovnleq","cmovgq") .Case("cmovnle", "cmovg") 679 .Case("cmovnzw", "cmovnew").Case("cmovnzl", "cmovnel") 680 .Case("cmovnzq", "cmovneq").Case("cmovnz", "cmovne") 681 .Case("cmovzw", "cmovew") .Case("cmovzl", "cmovel") 682 .Case("cmovzq", "cmoveq") .Case("cmovz", "cmove") 683 .Case("fwait", "wait") 684 .Case("movzx", "movzb") 685 .Default(Name); 686 687 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 688 const MCExpr *ExtraImmOp = 0; 689 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 690 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 691 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 692 bool IsVCMP = PatchedName.startswith("vcmp"); 693 unsigned SSECCIdx = IsVCMP ? 4 : 3; 694 unsigned SSEComparisonCode = StringSwitch<unsigned>( 695 PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) 696 .Case("eq", 0) 697 .Case("lt", 1) 698 .Case("le", 2) 699 .Case("unord", 3) 700 .Case("neq", 4) 701 .Case("nlt", 5) 702 .Case("nle", 6) 703 .Case("ord", 7) 704 .Case("eq_uq", 8) 705 .Case("nge", 9) 706 .Case("ngt", 0x0A) 707 .Case("false", 0x0B) 708 .Case("neq_oq", 0x0C) 709 .Case("ge", 0x0D) 710 .Case("gt", 0x0E) 711 .Case("true", 0x0F) 712 .Case("eq_os", 0x10) 713 .Case("lt_oq", 0x11) 714 .Case("le_oq", 0x12) 715 .Case("unord_s", 0x13) 716 .Case("neq_us", 0x14) 717 .Case("nlt_uq", 0x15) 718 .Case("nle_uq", 0x16) 719 .Case("ord_s", 0x17) 720 .Case("eq_us", 0x18) 721 .Case("nge_uq", 0x19) 722 .Case("ngt_uq", 0x1A) 723 .Case("false_os", 0x1B) 724 .Case("neq_os", 0x1C) 725 .Case("ge_oq", 0x1D) 726 .Case("gt_oq", 0x1E) 727 .Case("true_us", 0x1F) 728 .Default(~0U); 729 if (SSEComparisonCode != ~0U) { 730 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, 731 getParser().getContext()); 732 if (PatchedName.endswith("ss")) { 733 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 734 } else if (PatchedName.endswith("sd")) { 735 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 736 } else if (PatchedName.endswith("ps")) { 737 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 738 } else { 739 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); 740 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 741 } 742 } 743 } 744 745 // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq 746 if (PatchedName.startswith("vpclmul")) { 747 unsigned CLMULQuadWordSelect = StringSwitch<unsigned>( 748 PatchedName.slice(7, PatchedName.size() - 2)) 749 .Case("lqlq", 0x00) // src1[63:0], src2[63:0] 750 .Case("hqlq", 0x01) // src1[127:64], src2[63:0] 751 .Case("lqhq", 0x10) // src1[63:0], src2[127:64] 752 .Case("hqhq", 0x11) // src1[127:64], src2[127:64] 753 .Default(~0U); 754 if (CLMULQuadWordSelect != ~0U) { 755 ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect, 756 getParser().getContext()); 757 assert(PatchedName.endswith("dq") && "Unexpected mnemonic!"); 758 PatchedName = "vpclmulqdq"; 759 } 760 } 761 762 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 763 764 if (ExtraImmOp) 765 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 766 767 768 // Determine whether this is an instruction prefix. 769 bool isPrefix = 770 PatchedName == "lock" || PatchedName == "rep" || 771 PatchedName == "repne"; 772 773 774 // This does the actual operand parsing. Don't parse any more if we have a 775 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 776 // just want to parse the "lock" as the first instruction and the "incl" as 777 // the next one. 778 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 779 780 // Parse '*' modifier. 781 if (getLexer().is(AsmToken::Star)) { 782 SMLoc Loc = Parser.getTok().getLoc(); 783 Operands.push_back(X86Operand::CreateToken("*", Loc)); 784 Parser.Lex(); // Eat the star. 785 } 786 787 // Read the first operand. 788 if (X86Operand *Op = ParseOperand()) 789 Operands.push_back(Op); 790 else { 791 Parser.EatToEndOfStatement(); 792 return true; 793 } 794 795 while (getLexer().is(AsmToken::Comma)) { 796 Parser.Lex(); // Eat the comma. 797 798 // Parse and remember the operand. 799 if (X86Operand *Op = ParseOperand()) 800 Operands.push_back(Op); 801 else { 802 Parser.EatToEndOfStatement(); 803 return true; 804 } 805 } 806 807 if (getLexer().isNot(AsmToken::EndOfStatement)) { 808 Parser.EatToEndOfStatement(); 809 return TokError("unexpected token in argument list"); 810 } 811 } 812 813 if (getLexer().is(AsmToken::EndOfStatement)) 814 Parser.Lex(); // Consume the EndOfStatement 815 816 // FIXME: Hack to handle recognize s{hr,ar,hl} <op>, $1. Canonicalize to 817 // "shift <op>". 818 if ((Name.startswith("shr") || Name.startswith("sar") || 819 Name.startswith("shl")) && 820 Operands.size() == 3) { 821 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 822 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 823 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 824 delete Operands[1]; 825 Operands.erase(Operands.begin() + 1); 826 } 827 } 828 829 // FIXME: Hack to handle recognize "in[bwl] <op>". Canonicalize it to 830 // "inb <op>, %al". 831 if ((Name == "inb" || Name == "inw" || Name == "inl") && 832 Operands.size() == 2) { 833 unsigned Reg; 834 if (Name[2] == 'b') 835 Reg = MatchRegisterName("al"); 836 else if (Name[2] == 'w') 837 Reg = MatchRegisterName("ax"); 838 else 839 Reg = MatchRegisterName("eax"); 840 SMLoc Loc = Operands.back()->getEndLoc(); 841 Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc)); 842 } 843 844 // FIXME: Hack to handle recognize "out[bwl] <op>". Canonicalize it to 845 // "outb %al, <op>". 846 if ((Name == "outb" || Name == "outw" || Name == "outl") && 847 Operands.size() == 2) { 848 unsigned Reg; 849 if (Name[3] == 'b') 850 Reg = MatchRegisterName("al"); 851 else if (Name[3] == 'w') 852 Reg = MatchRegisterName("ax"); 853 else 854 Reg = MatchRegisterName("eax"); 855 SMLoc Loc = Operands.back()->getEndLoc(); 856 Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc)); 857 std::swap(Operands[1], Operands[2]); 858 } 859 860 // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx". 861 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && 862 Operands.size() == 3) { 863 X86Operand &Op = *(X86Operand*)Operands.back(); 864 if (Op.isMem() && Op.Mem.SegReg == 0 && 865 isa<MCConstantExpr>(Op.Mem.Disp) && 866 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 867 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 868 SMLoc Loc = Op.getEndLoc(); 869 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 870 delete &Op; 871 } 872 } 873 874 // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as 875 // "f{mul*,add*,sub*,div*} $op" 876 if ((Name.startswith("fmul") || Name.startswith("fadd") || 877 Name.startswith("fsub") || Name.startswith("fdiv")) && 878 Operands.size() == 3 && 879 static_cast<X86Operand*>(Operands[2])->isReg() && 880 static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) { 881 delete Operands[2]; 882 Operands.erase(Operands.begin() + 2); 883 } 884 885 // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B, 886 // B". 887 if (Name.startswith("imul") && Operands.size() == 3 && 888 static_cast<X86Operand*>(Operands[1])->isImm() && 889 static_cast<X86Operand*>(Operands.back())->isReg()) { 890 X86Operand *Op = static_cast<X86Operand*>(Operands.back()); 891 Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(), 892 Op->getEndLoc())); 893 } 894 895 // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same 896 // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity 897 // errors, since its encoding is the most compact. 898 if (Name == "sldt" && Operands.size() == 2 && 899 static_cast<X86Operand*>(Operands[1])->isMem()) { 900 delete Operands[0]; 901 Operands[0] = X86Operand::CreateToken("sldtw", NameLoc); 902 } 903 904 // The assembler accepts "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as 905 // synonyms. Our tables only have the "<reg>, <mem>" form, so if we see the 906 // other operand order, swap them. 907 if (Name == "xchgb" || Name == "xchgw" || Name == "xchgl" || Name == "xchgq"|| 908 Name == "xchg") 909 if (Operands.size() == 3 && 910 static_cast<X86Operand*>(Operands[1])->isMem() && 911 static_cast<X86Operand*>(Operands[2])->isReg()) { 912 std::swap(Operands[1], Operands[2]); 913 } 914 915 // The assembler accepts "testX <reg>, <mem>" and "testX <mem>, <reg>" as 916 // synonyms. Our tables only have the "<mem>, <reg>" form, so if we see the 917 // other operand order, swap them. 918 if (Name == "testb" || Name == "testw" || Name == "testl" || Name == "testq"|| 919 Name == "test") 920 if (Operands.size() == 3 && 921 static_cast<X86Operand*>(Operands[1])->isReg() && 922 static_cast<X86Operand*>(Operands[2])->isMem()) { 923 std::swap(Operands[1], Operands[2]); 924 } 925 926 return false; 927} 928 929bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { 930 StringRef IDVal = DirectiveID.getIdentifier(); 931 if (IDVal == ".word") 932 return ParseDirectiveWord(2, DirectiveID.getLoc()); 933 return true; 934} 935 936/// ParseDirectiveWord 937/// ::= .word [ expression (, expression)* ] 938bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 939 if (getLexer().isNot(AsmToken::EndOfStatement)) { 940 for (;;) { 941 const MCExpr *Value; 942 if (getParser().ParseExpression(Value)) 943 return true; 944 945 getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); 946 947 if (getLexer().is(AsmToken::EndOfStatement)) 948 break; 949 950 // FIXME: Improve diagnostic. 951 if (getLexer().isNot(AsmToken::Comma)) 952 return Error(L, "unexpected token in directive"); 953 Parser.Lex(); 954 } 955 } 956 957 Parser.Lex(); 958 return false; 959} 960 961 962bool 963X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, 964 const SmallVectorImpl<MCParsedAsmOperand*> 965 &Operands, 966 MCInst &Inst) { 967 assert(!Operands.empty() && "Unexpect empty operand list!"); 968 969 bool WasOriginallyInvalidOperand = false; 970 unsigned OrigErrorInfo; 971 972 // First, try a direct match. 973 switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) { 974 case Match_Success: 975 return false; 976 case Match_MissingFeature: 977 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 978 return true; 979 case Match_InvalidOperand: 980 WasOriginallyInvalidOperand = true; 981 break; 982 case Match_MnemonicFail: 983 break; 984 } 985 986 // FIXME: Ideally, we would only attempt suffix matches for things which are 987 // valid prefixes, and we could just infer the right unambiguous 988 // type. However, that requires substantially more matcher support than the 989 // following hack. 990 991 X86Operand *Op = static_cast<X86Operand*>(Operands[0]); 992 assert(Op->isToken() && "Leading operand should always be a mnemonic!"); 993 994 // Change the operand to point to a temporary token. 995 StringRef Base = Op->getToken(); 996 SmallString<16> Tmp; 997 Tmp += Base; 998 Tmp += ' '; 999 Op->setTokenValue(Tmp.str()); 1000 1001 // Check for the various suffix matches. 1002 Tmp[Base.size()] = 'b'; 1003 unsigned BErrorInfo, WErrorInfo, LErrorInfo, QErrorInfo; 1004 MatchResultTy MatchB = MatchInstructionImpl(Operands, Inst, BErrorInfo); 1005 Tmp[Base.size()] = 'w'; 1006 MatchResultTy MatchW = MatchInstructionImpl(Operands, Inst, WErrorInfo); 1007 Tmp[Base.size()] = 'l'; 1008 MatchResultTy MatchL = MatchInstructionImpl(Operands, Inst, LErrorInfo); 1009 Tmp[Base.size()] = 'q'; 1010 MatchResultTy MatchQ = MatchInstructionImpl(Operands, Inst, QErrorInfo); 1011 1012 // Restore the old token. 1013 Op->setTokenValue(Base); 1014 1015 // If exactly one matched, then we treat that as a successful match (and the 1016 // instruction will already have been filled in correctly, since the failing 1017 // matches won't have modified it). 1018 unsigned NumSuccessfulMatches = 1019 (MatchB == Match_Success) + (MatchW == Match_Success) + 1020 (MatchL == Match_Success) + (MatchQ == Match_Success); 1021 if (NumSuccessfulMatches == 1) 1022 return false; 1023 1024 // Otherwise, the match failed, try to produce a decent error message. 1025 1026 // If we had multiple suffix matches, then identify this as an ambiguous 1027 // match. 1028 if (NumSuccessfulMatches > 1) { 1029 char MatchChars[4]; 1030 unsigned NumMatches = 0; 1031 if (MatchB == Match_Success) 1032 MatchChars[NumMatches++] = 'b'; 1033 if (MatchW == Match_Success) 1034 MatchChars[NumMatches++] = 'w'; 1035 if (MatchL == Match_Success) 1036 MatchChars[NumMatches++] = 'l'; 1037 if (MatchQ == Match_Success) 1038 MatchChars[NumMatches++] = 'q'; 1039 1040 SmallString<126> Msg; 1041 raw_svector_ostream OS(Msg); 1042 OS << "ambiguous instructions require an explicit suffix (could be "; 1043 for (unsigned i = 0; i != NumMatches; ++i) { 1044 if (i != 0) 1045 OS << ", "; 1046 if (i + 1 == NumMatches) 1047 OS << "or "; 1048 OS << "'" << Base << MatchChars[i] << "'"; 1049 } 1050 OS << ")"; 1051 Error(IDLoc, OS.str()); 1052 return true; 1053 } 1054 1055 // Okay, we know that none of the variants matched successfully. 1056 1057 // If all of the instructions reported an invalid mnemonic, then the original 1058 // mnemonic was invalid. 1059 if ((MatchB == Match_MnemonicFail) && (MatchW == Match_MnemonicFail) && 1060 (MatchL == Match_MnemonicFail) && (MatchQ == Match_MnemonicFail)) { 1061 if (!WasOriginallyInvalidOperand) { 1062 Error(IDLoc, "invalid instruction mnemonic '" + Base + "'"); 1063 return true; 1064 } 1065 1066 // Recover location info for the operand if we know which was the problem. 1067 SMLoc ErrorLoc = IDLoc; 1068 if (OrigErrorInfo != ~0U) { 1069 ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc(); 1070 if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; 1071 } 1072 1073 Error(ErrorLoc, "invalid operand for instruction"); 1074 return true; 1075 } 1076 1077 // If one instruction matched with a missing feature, report this as a 1078 // missing feature. 1079 if ((MatchB == Match_MissingFeature) + (MatchW == Match_MissingFeature) + 1080 (MatchL == Match_MissingFeature) + (MatchQ == Match_MissingFeature) == 1){ 1081 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 1082 return true; 1083 } 1084 1085 // If one instruction matched with an invalid operand, report this as an 1086 // operand failure. 1087 if ((MatchB == Match_InvalidOperand) + (MatchW == Match_InvalidOperand) + 1088 (MatchL == Match_InvalidOperand) + (MatchQ == Match_InvalidOperand) == 1){ 1089 Error(IDLoc, "invalid operand for instruction"); 1090 return true; 1091 } 1092 1093 // If all of these were an outright failure, report it in a useless way. 1094 // FIXME: We should give nicer diagnostics about the exact failure. 1095 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); 1096 return true; 1097} 1098 1099 1100extern "C" void LLVMInitializeX86AsmLexer(); 1101 1102// Force static initialization. 1103extern "C" void LLVMInitializeX86AsmParser() { 1104 RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target); 1105 RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target); 1106 LLVMInitializeX86AsmLexer(); 1107} 1108 1109#define GET_REGISTER_MATCHER 1110#define GET_MATCHER_IMPLEMENTATION 1111#include "X86GenAsmMatcher.inc" 1112