X86AsmParser.cpp revision cf50a5390c09325a7fc41640449205eced4363f6
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "llvm/Target/TargetAsmParser.h" 11#include "X86.h" 12#include "llvm/ADT/SmallVector.h" 13#include "llvm/ADT/StringSwitch.h" 14#include "llvm/ADT/Twine.h" 15#include "llvm/MC/MCStreamer.h" 16#include "llvm/MC/MCExpr.h" 17#include "llvm/MC/MCInst.h" 18#include "llvm/MC/MCParser/MCAsmLexer.h" 19#include "llvm/MC/MCParser/MCAsmParser.h" 20#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 21#include "llvm/Support/SourceMgr.h" 22#include "llvm/Target/TargetRegistry.h" 23#include "llvm/Target/TargetAsmParser.h" 24using namespace llvm; 25 26namespace { 27struct X86Operand; 28 29class X86ATTAsmParser : public TargetAsmParser { 30 MCAsmParser &Parser; 31 32protected: 33 unsigned Is64Bit : 1; 34 35private: 36 MCAsmParser &getParser() const { return Parser; } 37 38 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 39 40 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } 41 42 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 43 44 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); 45 46 X86Operand *ParseOperand(); 47 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 48 49 bool ParseDirectiveWord(unsigned Size, SMLoc L); 50 51 void InstructionCleanup(MCInst &Inst); 52 53 /// @name Auto-generated Match Functions 54 /// { 55 56 bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, 57 MCInst &Inst); 58 59 bool MatchInstructionImpl( 60 const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst); 61 62 /// } 63 64public: 65 X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) 66 : TargetAsmParser(T), Parser(_Parser) {} 67 68 virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, 69 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 70 71 virtual bool ParseDirective(AsmToken DirectiveID); 72}; 73 74class X86_32ATTAsmParser : public X86ATTAsmParser { 75public: 76 X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser) 77 : X86ATTAsmParser(T, _Parser) { 78 Is64Bit = false; 79 } 80}; 81 82class X86_64ATTAsmParser : public X86ATTAsmParser { 83public: 84 X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser) 85 : X86ATTAsmParser(T, _Parser) { 86 Is64Bit = true; 87 } 88}; 89 90} // end anonymous namespace 91 92/// @name Auto-generated Match Functions 93/// { 94 95static unsigned MatchRegisterName(StringRef Name); 96 97/// } 98 99namespace { 100 101/// X86Operand - Instances of this class represent a parsed X86 machine 102/// instruction. 103struct X86Operand : public MCParsedAsmOperand { 104 enum KindTy { 105 Token, 106 Register, 107 Immediate, 108 Memory 109 } Kind; 110 111 SMLoc StartLoc, EndLoc; 112 113 union { 114 struct { 115 const char *Data; 116 unsigned Length; 117 } Tok; 118 119 struct { 120 unsigned RegNo; 121 } Reg; 122 123 struct { 124 const MCExpr *Val; 125 } Imm; 126 127 struct { 128 unsigned SegReg; 129 const MCExpr *Disp; 130 unsigned BaseReg; 131 unsigned IndexReg; 132 unsigned Scale; 133 } Mem; 134 }; 135 136 X86Operand(KindTy K, SMLoc Start, SMLoc End) 137 : Kind(K), StartLoc(Start), EndLoc(End) {} 138 139 /// getStartLoc - Get the location of the first token of this operand. 140 SMLoc getStartLoc() const { return StartLoc; } 141 /// getEndLoc - Get the location of the last token of this operand. 142 SMLoc getEndLoc() const { return EndLoc; } 143 144 StringRef getToken() const { 145 assert(Kind == Token && "Invalid access!"); 146 return StringRef(Tok.Data, Tok.Length); 147 } 148 void setTokenValue(StringRef Value) { 149 assert(Kind == Token && "Invalid access!"); 150 Tok.Data = Value.data(); 151 Tok.Length = Value.size(); 152 } 153 154 unsigned getReg() const { 155 assert(Kind == Register && "Invalid access!"); 156 return Reg.RegNo; 157 } 158 159 const MCExpr *getImm() const { 160 assert(Kind == Immediate && "Invalid access!"); 161 return Imm.Val; 162 } 163 164 const MCExpr *getMemDisp() const { 165 assert(Kind == Memory && "Invalid access!"); 166 return Mem.Disp; 167 } 168 unsigned getMemSegReg() const { 169 assert(Kind == Memory && "Invalid access!"); 170 return Mem.SegReg; 171 } 172 unsigned getMemBaseReg() const { 173 assert(Kind == Memory && "Invalid access!"); 174 return Mem.BaseReg; 175 } 176 unsigned getMemIndexReg() const { 177 assert(Kind == Memory && "Invalid access!"); 178 return Mem.IndexReg; 179 } 180 unsigned getMemScale() const { 181 assert(Kind == Memory && "Invalid access!"); 182 return Mem.Scale; 183 } 184 185 bool isToken() const {return Kind == Token; } 186 187 bool isImm() const { return Kind == Immediate; } 188 189 bool isImmSExti16i8() const { 190 if (!isImm()) 191 return false; 192 193 // If this isn't a constant expr, just assume it fits and let relaxation 194 // handle it. 195 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 196 if (!CE) 197 return true; 198 199 // Otherwise, check the value is in a range that makes sense for this 200 // extension. 201 uint64_t Value = CE->getValue(); 202 return (( Value <= 0x000000000000007FULL)|| 203 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| 204 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 205 } 206 bool isImmSExti32i8() const { 207 if (!isImm()) 208 return false; 209 210 // If this isn't a constant expr, just assume it fits and let relaxation 211 // handle it. 212 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 213 if (!CE) 214 return true; 215 216 // Otherwise, check the value is in a range that makes sense for this 217 // extension. 218 uint64_t Value = CE->getValue(); 219 return (( Value <= 0x000000000000007FULL)|| 220 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| 221 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 222 } 223 bool isImmSExti64i8() const { 224 if (!isImm()) 225 return false; 226 227 // If this isn't a constant expr, just assume it fits and let relaxation 228 // handle it. 229 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 230 if (!CE) 231 return true; 232 233 // Otherwise, check the value is in a range that makes sense for this 234 // extension. 235 uint64_t Value = CE->getValue(); 236 return (( Value <= 0x000000000000007FULL)|| 237 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 238 } 239 bool isImmSExti64i32() const { 240 if (!isImm()) 241 return false; 242 243 // If this isn't a constant expr, just assume it fits and let relaxation 244 // handle it. 245 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 246 if (!CE) 247 return true; 248 249 // Otherwise, check the value is in a range that makes sense for this 250 // extension. 251 uint64_t Value = CE->getValue(); 252 return (( Value <= 0x000000007FFFFFFFULL)|| 253 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 254 } 255 256 bool isMem() const { return Kind == Memory; } 257 258 bool isAbsMem() const { 259 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && 260 !getMemIndexReg() && getMemScale() == 1; 261 } 262 263 bool isNoSegMem() const { 264 return Kind == Memory && !getMemSegReg(); 265 } 266 267 bool isReg() const { return Kind == Register; } 268 269 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 270 // Add as immediates when possible. 271 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 272 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 273 else 274 Inst.addOperand(MCOperand::CreateExpr(Expr)); 275 } 276 277 void addRegOperands(MCInst &Inst, unsigned N) const { 278 assert(N == 1 && "Invalid number of operands!"); 279 Inst.addOperand(MCOperand::CreateReg(getReg())); 280 } 281 282 void addImmOperands(MCInst &Inst, unsigned N) const { 283 assert(N == 1 && "Invalid number of operands!"); 284 addExpr(Inst, getImm()); 285 } 286 287 void addMemOperands(MCInst &Inst, unsigned N) const { 288 assert((N == 5) && "Invalid number of operands!"); 289 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 290 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 291 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 292 addExpr(Inst, getMemDisp()); 293 Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); 294 } 295 296 void addAbsMemOperands(MCInst &Inst, unsigned N) const { 297 assert((N == 1) && "Invalid number of operands!"); 298 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 299 } 300 301 void addNoSegMemOperands(MCInst &Inst, unsigned N) const { 302 assert((N == 4) && "Invalid number of operands!"); 303 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 304 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 305 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 306 addExpr(Inst, getMemDisp()); 307 } 308 309 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { 310 X86Operand *Res = new X86Operand(Token, Loc, Loc); 311 Res->Tok.Data = Str.data(); 312 Res->Tok.Length = Str.size(); 313 return Res; 314 } 315 316 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) { 317 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); 318 Res->Reg.RegNo = RegNo; 319 return Res; 320 } 321 322 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ 323 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); 324 Res->Imm.Val = Val; 325 return Res; 326 } 327 328 /// Create an absolute memory operand. 329 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, 330 SMLoc EndLoc) { 331 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 332 Res->Mem.SegReg = 0; 333 Res->Mem.Disp = Disp; 334 Res->Mem.BaseReg = 0; 335 Res->Mem.IndexReg = 0; 336 Res->Mem.Scale = 1; 337 return Res; 338 } 339 340 /// Create a generalized memory operand. 341 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, 342 unsigned BaseReg, unsigned IndexReg, 343 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) { 344 // We should never just have a displacement, that should be parsed as an 345 // absolute memory operand. 346 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); 347 348 // The scale should always be one of {1,2,4,8}. 349 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && 350 "Invalid scale!"); 351 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 352 Res->Mem.SegReg = SegReg; 353 Res->Mem.Disp = Disp; 354 Res->Mem.BaseReg = BaseReg; 355 Res->Mem.IndexReg = IndexReg; 356 Res->Mem.Scale = Scale; 357 return Res; 358 } 359}; 360 361} // end anonymous namespace. 362 363 364bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, 365 SMLoc &StartLoc, SMLoc &EndLoc) { 366 RegNo = 0; 367 const AsmToken &TokPercent = Parser.getTok(); 368 assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); 369 StartLoc = TokPercent.getLoc(); 370 Parser.Lex(); // Eat percent token. 371 372 const AsmToken &Tok = Parser.getTok(); 373 if (Tok.isNot(AsmToken::Identifier)) 374 return Error(Tok.getLoc(), "invalid register name"); 375 376 // FIXME: Validate register for the current architecture; we have to do 377 // validation later, so maybe there is no need for this here. 378 RegNo = MatchRegisterName(Tok.getString()); 379 380 // Parse %st(1) and "%st" as "%st(0)" 381 if (RegNo == 0 && Tok.getString() == "st") { 382 RegNo = X86::ST0; 383 EndLoc = Tok.getLoc(); 384 Parser.Lex(); // Eat 'st' 385 386 // Check to see if we have '(4)' after %st. 387 if (getLexer().isNot(AsmToken::LParen)) 388 return false; 389 // Lex the paren. 390 getParser().Lex(); 391 392 const AsmToken &IntTok = Parser.getTok(); 393 if (IntTok.isNot(AsmToken::Integer)) 394 return Error(IntTok.getLoc(), "expected stack index"); 395 switch (IntTok.getIntVal()) { 396 case 0: RegNo = X86::ST0; break; 397 case 1: RegNo = X86::ST1; break; 398 case 2: RegNo = X86::ST2; break; 399 case 3: RegNo = X86::ST3; break; 400 case 4: RegNo = X86::ST4; break; 401 case 5: RegNo = X86::ST5; break; 402 case 6: RegNo = X86::ST6; break; 403 case 7: RegNo = X86::ST7; break; 404 default: return Error(IntTok.getLoc(), "invalid stack index"); 405 } 406 407 if (getParser().Lex().isNot(AsmToken::RParen)) 408 return Error(Parser.getTok().getLoc(), "expected ')'"); 409 410 EndLoc = Tok.getLoc(); 411 Parser.Lex(); // Eat ')' 412 return false; 413 } 414 415 if (RegNo == 0) 416 return Error(Tok.getLoc(), "invalid register name"); 417 418 EndLoc = Tok.getLoc(); 419 Parser.Lex(); // Eat identifier token. 420 return false; 421} 422 423X86Operand *X86ATTAsmParser::ParseOperand() { 424 switch (getLexer().getKind()) { 425 default: 426 // Parse a memory operand with no segment register. 427 return ParseMemOperand(0, Parser.getTok().getLoc()); 428 case AsmToken::Percent: { 429 // Read the register. 430 unsigned RegNo; 431 SMLoc Start, End; 432 if (ParseRegister(RegNo, Start, End)) return 0; 433 434 // If this is a segment register followed by a ':', then this is the start 435 // of a memory reference, otherwise this is a normal register reference. 436 if (getLexer().isNot(AsmToken::Colon)) 437 return X86Operand::CreateReg(RegNo, Start, End); 438 439 440 getParser().Lex(); // Eat the colon. 441 return ParseMemOperand(RegNo, Start); 442 } 443 case AsmToken::Dollar: { 444 // $42 -> immediate. 445 SMLoc Start = Parser.getTok().getLoc(), End; 446 Parser.Lex(); 447 const MCExpr *Val; 448 if (getParser().ParseExpression(Val, End)) 449 return 0; 450 return X86Operand::CreateImm(Val, Start, End); 451 } 452 } 453} 454 455/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 456/// has already been parsed if present. 457X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { 458 459 // We have to disambiguate a parenthesized expression "(4+5)" from the start 460 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 461 // only way to do this without lookahead is to eat the '(' and see what is 462 // after it. 463 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 464 if (getLexer().isNot(AsmToken::LParen)) { 465 SMLoc ExprEnd; 466 if (getParser().ParseExpression(Disp, ExprEnd)) return 0; 467 468 // After parsing the base expression we could either have a parenthesized 469 // memory address or not. If not, return now. If so, eat the (. 470 if (getLexer().isNot(AsmToken::LParen)) { 471 // Unless we have a segment register, treat this as an immediate. 472 if (SegReg == 0) 473 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 474 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 475 } 476 477 // Eat the '('. 478 Parser.Lex(); 479 } else { 480 // Okay, we have a '('. We don't know if this is an expression or not, but 481 // so we have to eat the ( to see beyond it. 482 SMLoc LParenLoc = Parser.getTok().getLoc(); 483 Parser.Lex(); // Eat the '('. 484 485 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 486 // Nothing to do here, fall into the code below with the '(' part of the 487 // memory operand consumed. 488 } else { 489 SMLoc ExprEnd; 490 491 // It must be an parenthesized expression, parse it now. 492 if (getParser().ParseParenExpression(Disp, ExprEnd)) 493 return 0; 494 495 // After parsing the base expression we could either have a parenthesized 496 // memory address or not. If not, return now. If so, eat the (. 497 if (getLexer().isNot(AsmToken::LParen)) { 498 // Unless we have a segment register, treat this as an immediate. 499 if (SegReg == 0) 500 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 501 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 502 } 503 504 // Eat the '('. 505 Parser.Lex(); 506 } 507 } 508 509 // If we reached here, then we just ate the ( of the memory operand. Process 510 // the rest of the memory operand. 511 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 512 513 if (getLexer().is(AsmToken::Percent)) { 514 SMLoc L; 515 if (ParseRegister(BaseReg, L, L)) return 0; 516 } 517 518 if (getLexer().is(AsmToken::Comma)) { 519 Parser.Lex(); // Eat the comma. 520 521 // Following the comma we should have either an index register, or a scale 522 // value. We don't support the later form, but we want to parse it 523 // correctly. 524 // 525 // Not that even though it would be completely consistent to support syntax 526 // like "1(%eax,,1)", the assembler doesn't. 527 if (getLexer().is(AsmToken::Percent)) { 528 SMLoc L; 529 if (ParseRegister(IndexReg, L, L)) return 0; 530 531 if (getLexer().isNot(AsmToken::RParen)) { 532 // Parse the scale amount: 533 // ::= ',' [scale-expression] 534 if (getLexer().isNot(AsmToken::Comma)) { 535 Error(Parser.getTok().getLoc(), 536 "expected comma in scale expression"); 537 return 0; 538 } 539 Parser.Lex(); // Eat the comma. 540 541 if (getLexer().isNot(AsmToken::RParen)) { 542 SMLoc Loc = Parser.getTok().getLoc(); 543 544 int64_t ScaleVal; 545 if (getParser().ParseAbsoluteExpression(ScaleVal)) 546 return 0; 547 548 // Validate the scale amount. 549 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 550 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 551 return 0; 552 } 553 Scale = (unsigned)ScaleVal; 554 } 555 } 556 } else if (getLexer().isNot(AsmToken::RParen)) { 557 // Otherwise we have the unsupported form of a scale amount without an 558 // index. 559 SMLoc Loc = Parser.getTok().getLoc(); 560 561 int64_t Value; 562 if (getParser().ParseAbsoluteExpression(Value)) 563 return 0; 564 565 Error(Loc, "cannot have scale factor without index register"); 566 return 0; 567 } 568 } 569 570 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 571 if (getLexer().isNot(AsmToken::RParen)) { 572 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 573 return 0; 574 } 575 SMLoc MemEnd = Parser.getTok().getLoc(); 576 Parser.Lex(); // Eat the ')'. 577 578 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 579 MemStart, MemEnd); 580} 581 582bool X86ATTAsmParser:: 583ParseInstruction(const StringRef &Name, SMLoc NameLoc, 584 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 585 // The various flavors of pushf and popf use Requires<In32BitMode> and 586 // Requires<In64BitMode>, but the assembler doesn't yet implement that. 587 // For now, just do a manual check to prevent silent misencoding. 588 if (Is64Bit) { 589 if (Name == "popfl") 590 return Error(NameLoc, "popfl cannot be encoded in 64-bit mode"); 591 else if (Name == "pushfl") 592 return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode"); 593 } else { 594 if (Name == "popfq") 595 return Error(NameLoc, "popfq cannot be encoded in 32-bit mode"); 596 else if (Name == "pushfq") 597 return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode"); 598 } 599 600 // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to 601 // represent alternative syntaxes in the .td file, without requiring 602 // instruction duplication. 603 StringRef PatchedName = StringSwitch<StringRef>(Name) 604 .Case("sal", "shl") 605 .Case("salb", "shlb") 606 .Case("sall", "shll") 607 .Case("salq", "shlq") 608 .Case("salw", "shlw") 609 .Case("repe", "rep") 610 .Case("repz", "rep") 611 .Case("repnz", "repne") 612 .Case("pushf", Is64Bit ? "pushfq" : "pushfl") 613 .Case("popf", Is64Bit ? "popfq" : "popfl") 614 .Case("retl", Is64Bit ? "retl" : "ret") 615 .Case("retq", Is64Bit ? "ret" : "retq") 616 .Case("setz", "sete") 617 .Case("setnz", "setne") 618 .Case("jz", "je") 619 .Case("jnz", "jne") 620 .Case("cmovcl", "cmovbl") 621 .Case("cmovcl", "cmovbl") 622 .Case("cmovnal", "cmovbel") 623 .Case("cmovnbl", "cmovael") 624 .Case("cmovnbel", "cmoval") 625 .Case("cmovncl", "cmovael") 626 .Case("cmovngl", "cmovlel") 627 .Case("cmovnl", "cmovgel") 628 .Case("cmovngl", "cmovlel") 629 .Case("cmovngel", "cmovll") 630 .Case("cmovnll", "cmovgel") 631 .Case("cmovnlel", "cmovgl") 632 .Case("cmovnzl", "cmovnel") 633 .Case("cmovzl", "cmovel") 634 .Default(Name); 635 636 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 637 const MCExpr *ExtraImmOp = 0; 638 if (PatchedName.startswith("cmp") && 639 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 640 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 641 unsigned SSEComparisonCode = StringSwitch<unsigned>( 642 PatchedName.slice(3, PatchedName.size() - 2)) 643 .Case("eq", 0) 644 .Case("lt", 1) 645 .Case("le", 2) 646 .Case("unord", 3) 647 .Case("neq", 4) 648 .Case("nlt", 5) 649 .Case("nle", 6) 650 .Case("ord", 7) 651 .Default(~0U); 652 if (SSEComparisonCode != ~0U) { 653 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, 654 getParser().getContext()); 655 if (PatchedName.endswith("ss")) { 656 PatchedName = "cmpss"; 657 } else if (PatchedName.endswith("sd")) { 658 PatchedName = "cmpsd"; 659 } else if (PatchedName.endswith("ps")) { 660 PatchedName = "cmpps"; 661 } else { 662 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); 663 PatchedName = "cmppd"; 664 } 665 } 666 } 667 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 668 669 if (ExtraImmOp) 670 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 671 672 if (getLexer().isNot(AsmToken::EndOfStatement)) { 673 674 // Parse '*' modifier. 675 if (getLexer().is(AsmToken::Star)) { 676 SMLoc Loc = Parser.getTok().getLoc(); 677 Operands.push_back(X86Operand::CreateToken("*", Loc)); 678 Parser.Lex(); // Eat the star. 679 } 680 681 // Read the first operand. 682 if (X86Operand *Op = ParseOperand()) 683 Operands.push_back(Op); 684 else 685 return true; 686 687 while (getLexer().is(AsmToken::Comma)) { 688 Parser.Lex(); // Eat the comma. 689 690 // Parse and remember the operand. 691 if (X86Operand *Op = ParseOperand()) 692 Operands.push_back(Op); 693 else 694 return true; 695 } 696 } 697 698 // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1. 699 if ((Name.startswith("shr") || Name.startswith("sar") || 700 Name.startswith("shl")) && 701 Operands.size() == 3 && 702 static_cast<X86Operand*>(Operands[1])->isImm() && 703 isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) && 704 cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) { 705 delete Operands[1]; 706 Operands.erase(Operands.begin() + 1); 707 } 708 709 // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as 710 // "f{mul*,add*,sub*,div*} $op" 711 if ((Name.startswith("fmul") || Name.startswith("fadd") || 712 Name.startswith("fsub") || Name.startswith("fdiv")) && 713 Operands.size() == 3 && 714 static_cast<X86Operand*>(Operands[2])->isReg() && 715 static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) { 716 delete Operands[2]; 717 Operands.erase(Operands.begin() + 2); 718 } 719 720 return false; 721} 722 723bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { 724 StringRef IDVal = DirectiveID.getIdentifier(); 725 if (IDVal == ".word") 726 return ParseDirectiveWord(2, DirectiveID.getLoc()); 727 return true; 728} 729 730/// ParseDirectiveWord 731/// ::= .word [ expression (, expression)* ] 732bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 733 if (getLexer().isNot(AsmToken::EndOfStatement)) { 734 for (;;) { 735 const MCExpr *Value; 736 if (getParser().ParseExpression(Value)) 737 return true; 738 739 getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); 740 741 if (getLexer().is(AsmToken::EndOfStatement)) 742 break; 743 744 // FIXME: Improve diagnostic. 745 if (getLexer().isNot(AsmToken::Comma)) 746 return Error(L, "unexpected token in directive"); 747 Parser.Lex(); 748 } 749 } 750 751 Parser.Lex(); 752 return false; 753} 754 755/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a 756/// imm operand, to having "rm" or "mr" operands with the offset in the disp 757/// field. 758static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo, 759 bool isMR) { 760 MCOperand Disp = Inst.getOperand(0); 761 762 // Start over with an empty instruction. 763 Inst = MCInst(); 764 Inst.setOpcode(Opc); 765 766 if (!isMR) 767 Inst.addOperand(MCOperand::CreateReg(RegNo)); 768 769 // Add the mem operand. 770 Inst.addOperand(MCOperand::CreateReg(0)); // Segment 771 Inst.addOperand(MCOperand::CreateImm(1)); // Scale 772 Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg 773 Inst.addOperand(Disp); // Displacement 774 Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg 775 776 if (isMR) 777 Inst.addOperand(MCOperand::CreateReg(RegNo)); 778} 779 780// FIXME: Custom X86 cleanup function to implement a temporary hack to handle 781// matching INCL/DECL correctly for x86_64. This needs to be replaced by a 782// proper mechanism for supporting (ambiguous) feature dependent instructions. 783void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) { 784 if (!Is64Bit) return; 785 786 switch (Inst.getOpcode()) { 787 case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break; 788 case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break; 789 case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break; 790 case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break; 791 case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break; 792 case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; 793 case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; 794 case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; 795 796 // moffset instructions are x86-32 only. 797 case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break; 798 case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break; 799 case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break; 800 case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break; 801 case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break; 802 case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break; 803 } 804} 805 806bool 807X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> 808 &Operands, 809 MCInst &Inst) { 810 // First, try a direct match. 811 if (!MatchInstructionImpl(Operands, Inst)) 812 return false; 813 814 // Ignore anything which is obviously not a suffix match. 815 if (Operands.size() == 0) 816 return true; 817 X86Operand *Op = static_cast<X86Operand*>(Operands[0]); 818 if (!Op->isToken() || Op->getToken().size() > 15) 819 return true; 820 821 // FIXME: Ideally, we would only attempt suffix matches for things which are 822 // valid prefixes, and we could just infer the right unambiguous 823 // type. However, that requires substantially more matcher support than the 824 // following hack. 825 826 // Change the operand to point to a temporary token. 827 char Tmp[16]; 828 StringRef Base = Op->getToken(); 829 memcpy(Tmp, Base.data(), Base.size()); 830 Op->setTokenValue(StringRef(Tmp, Base.size() + 1)); 831 832 // Check for the various suffix matches. 833 Tmp[Base.size()] = 'b'; 834 bool MatchB = MatchInstructionImpl(Operands, Inst); 835 Tmp[Base.size()] = 'w'; 836 bool MatchW = MatchInstructionImpl(Operands, Inst); 837 Tmp[Base.size()] = 'l'; 838 bool MatchL = MatchInstructionImpl(Operands, Inst); 839 Tmp[Base.size()] = 'q'; 840 bool MatchQ = MatchInstructionImpl(Operands, Inst); 841 842 // Restore the old token. 843 Op->setTokenValue(Base); 844 845 // If exactly one matched, then we treat that as a successful match (and the 846 // instruction will already have been filled in correctly, since the failing 847 // matches won't have modified it). 848 if (MatchB + MatchW + MatchL + MatchQ == 3) 849 return false; 850 851 // Otherwise, the match failed. 852 return true; 853} 854 855 856extern "C" void LLVMInitializeX86AsmLexer(); 857 858// Force static initialization. 859extern "C" void LLVMInitializeX86AsmParser() { 860 RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target); 861 RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target); 862 LLVMInitializeX86AsmLexer(); 863} 864 865#include "X86GenAsmMatcher.inc" 866