X86AsmParser.cpp revision ffc0e73046f737d75e0a62b3a83ef19bcef111e3
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "llvm/Target/TargetAsmParser.h" 11#include "X86.h" 12#include "X86Subtarget.h" 13#include "llvm/Target/TargetRegistry.h" 14#include "llvm/Target/TargetAsmParser.h" 15#include "llvm/MC/MCStreamer.h" 16#include "llvm/MC/MCExpr.h" 17#include "llvm/MC/MCInst.h" 18#include "llvm/MC/MCSubtargetInfo.h" 19#include "llvm/MC/MCParser/MCAsmLexer.h" 20#include "llvm/MC/MCParser/MCAsmParser.h" 21#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 22#include "llvm/ADT/OwningPtr.h" 23#include "llvm/ADT/SmallString.h" 24#include "llvm/ADT/SmallVector.h" 25#include "llvm/ADT/StringExtras.h" 26#include "llvm/ADT/StringSwitch.h" 27#include "llvm/ADT/Twine.h" 28#include "llvm/Support/SourceMgr.h" 29#include "llvm/Support/raw_ostream.h" 30 31#define GET_SUBTARGETINFO_ENUM 32#include "X86GenSubtargetInfo.inc" 33 34using namespace llvm; 35 36namespace { 37struct X86Operand; 38 39class X86ATTAsmParser : public TargetAsmParser { 40 MCSubtargetInfo &STI; 41 MCAsmParser &Parser; 42 43private: 44 MCAsmParser &getParser() const { return Parser; } 45 46 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 47 48 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 49 50 X86Operand *ParseOperand(); 51 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 52 53 bool ParseDirectiveWord(unsigned Size, SMLoc L); 54 55 bool MatchAndEmitInstruction(SMLoc IDLoc, 56 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 57 MCStreamer &Out); 58 59 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) 60 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. 61 bool isSrcOp(X86Operand &Op); 62 63 /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode 64 /// or %es:(%edi) in 32bit mode. 65 bool isDstOp(X86Operand &Op); 66 67 bool is64Bit() { 68 // FIXME: Can tablegen auto-generate this? 69 return (STI.getFeatureBits() & X86::Mode64Bit) != 0; 70 } 71 72 /// @name Auto-generated Matcher Functions 73 /// { 74 75#define GET_ASSEMBLER_HEADER 76#include "X86GenAsmMatcher.inc" 77 78 /// } 79 80public: 81 X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) 82 : TargetAsmParser(), STI(sti), Parser(parser) { 83 84 // Initialize the set of available features. 85 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); 86 } 87 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); 88 89 virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, 90 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 91 92 virtual bool ParseDirective(AsmToken DirectiveID); 93}; 94} // end anonymous namespace 95 96/// @name Auto-generated Match Functions 97/// { 98 99static unsigned MatchRegisterName(StringRef Name); 100 101/// } 102 103namespace { 104 105/// X86Operand - Instances of this class represent a parsed X86 machine 106/// instruction. 107struct X86Operand : public MCParsedAsmOperand { 108 enum KindTy { 109 Token, 110 Register, 111 Immediate, 112 Memory 113 } Kind; 114 115 SMLoc StartLoc, EndLoc; 116 117 union { 118 struct { 119 const char *Data; 120 unsigned Length; 121 } Tok; 122 123 struct { 124 unsigned RegNo; 125 } Reg; 126 127 struct { 128 const MCExpr *Val; 129 } Imm; 130 131 struct { 132 unsigned SegReg; 133 const MCExpr *Disp; 134 unsigned BaseReg; 135 unsigned IndexReg; 136 unsigned Scale; 137 } Mem; 138 }; 139 140 X86Operand(KindTy K, SMLoc Start, SMLoc End) 141 : Kind(K), StartLoc(Start), EndLoc(End) {} 142 143 /// getStartLoc - Get the location of the first token of this operand. 144 SMLoc getStartLoc() const { return StartLoc; } 145 /// getEndLoc - Get the location of the last token of this operand. 146 SMLoc getEndLoc() const { return EndLoc; } 147 148 virtual void dump(raw_ostream &OS) const {} 149 150 StringRef getToken() const { 151 assert(Kind == Token && "Invalid access!"); 152 return StringRef(Tok.Data, Tok.Length); 153 } 154 void setTokenValue(StringRef Value) { 155 assert(Kind == Token && "Invalid access!"); 156 Tok.Data = Value.data(); 157 Tok.Length = Value.size(); 158 } 159 160 unsigned getReg() const { 161 assert(Kind == Register && "Invalid access!"); 162 return Reg.RegNo; 163 } 164 165 const MCExpr *getImm() const { 166 assert(Kind == Immediate && "Invalid access!"); 167 return Imm.Val; 168 } 169 170 const MCExpr *getMemDisp() const { 171 assert(Kind == Memory && "Invalid access!"); 172 return Mem.Disp; 173 } 174 unsigned getMemSegReg() const { 175 assert(Kind == Memory && "Invalid access!"); 176 return Mem.SegReg; 177 } 178 unsigned getMemBaseReg() const { 179 assert(Kind == Memory && "Invalid access!"); 180 return Mem.BaseReg; 181 } 182 unsigned getMemIndexReg() const { 183 assert(Kind == Memory && "Invalid access!"); 184 return Mem.IndexReg; 185 } 186 unsigned getMemScale() const { 187 assert(Kind == Memory && "Invalid access!"); 188 return Mem.Scale; 189 } 190 191 bool isToken() const {return Kind == Token; } 192 193 bool isImm() const { return Kind == Immediate; } 194 195 bool isImmSExti16i8() const { 196 if (!isImm()) 197 return false; 198 199 // If this isn't a constant expr, just assume it fits and let relaxation 200 // handle it. 201 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 202 if (!CE) 203 return true; 204 205 // Otherwise, check the value is in a range that makes sense for this 206 // extension. 207 uint64_t Value = CE->getValue(); 208 return (( Value <= 0x000000000000007FULL)|| 209 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| 210 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 211 } 212 bool isImmSExti32i8() const { 213 if (!isImm()) 214 return false; 215 216 // If this isn't a constant expr, just assume it fits and let relaxation 217 // handle it. 218 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 219 if (!CE) 220 return true; 221 222 // Otherwise, check the value is in a range that makes sense for this 223 // extension. 224 uint64_t Value = CE->getValue(); 225 return (( Value <= 0x000000000000007FULL)|| 226 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| 227 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 228 } 229 bool isImmSExti64i8() const { 230 if (!isImm()) 231 return false; 232 233 // If this isn't a constant expr, just assume it fits and let relaxation 234 // handle it. 235 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 236 if (!CE) 237 return true; 238 239 // Otherwise, check the value is in a range that makes sense for this 240 // extension. 241 uint64_t Value = CE->getValue(); 242 return (( Value <= 0x000000000000007FULL)|| 243 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 244 } 245 bool isImmSExti64i32() const { 246 if (!isImm()) 247 return false; 248 249 // If this isn't a constant expr, just assume it fits and let relaxation 250 // handle it. 251 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 252 if (!CE) 253 return true; 254 255 // Otherwise, check the value is in a range that makes sense for this 256 // extension. 257 uint64_t Value = CE->getValue(); 258 return (( Value <= 0x000000007FFFFFFFULL)|| 259 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 260 } 261 262 bool isMem() const { return Kind == Memory; } 263 264 bool isAbsMem() const { 265 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && 266 !getMemIndexReg() && getMemScale() == 1; 267 } 268 269 bool isReg() const { return Kind == Register; } 270 271 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 272 // Add as immediates when possible. 273 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 274 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 275 else 276 Inst.addOperand(MCOperand::CreateExpr(Expr)); 277 } 278 279 void addRegOperands(MCInst &Inst, unsigned N) const { 280 assert(N == 1 && "Invalid number of operands!"); 281 Inst.addOperand(MCOperand::CreateReg(getReg())); 282 } 283 284 void addImmOperands(MCInst &Inst, unsigned N) const { 285 assert(N == 1 && "Invalid number of operands!"); 286 addExpr(Inst, getImm()); 287 } 288 289 void addMemOperands(MCInst &Inst, unsigned N) const { 290 assert((N == 5) && "Invalid number of operands!"); 291 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 292 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 293 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 294 addExpr(Inst, getMemDisp()); 295 Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); 296 } 297 298 void addAbsMemOperands(MCInst &Inst, unsigned N) const { 299 assert((N == 1) && "Invalid number of operands!"); 300 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 301 } 302 303 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { 304 X86Operand *Res = new X86Operand(Token, Loc, Loc); 305 Res->Tok.Data = Str.data(); 306 Res->Tok.Length = Str.size(); 307 return Res; 308 } 309 310 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) { 311 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); 312 Res->Reg.RegNo = RegNo; 313 return Res; 314 } 315 316 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ 317 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); 318 Res->Imm.Val = Val; 319 return Res; 320 } 321 322 /// Create an absolute memory operand. 323 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, 324 SMLoc EndLoc) { 325 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 326 Res->Mem.SegReg = 0; 327 Res->Mem.Disp = Disp; 328 Res->Mem.BaseReg = 0; 329 Res->Mem.IndexReg = 0; 330 Res->Mem.Scale = 1; 331 return Res; 332 } 333 334 /// Create a generalized memory operand. 335 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, 336 unsigned BaseReg, unsigned IndexReg, 337 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) { 338 // We should never just have a displacement, that should be parsed as an 339 // absolute memory operand. 340 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); 341 342 // The scale should always be one of {1,2,4,8}. 343 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && 344 "Invalid scale!"); 345 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 346 Res->Mem.SegReg = SegReg; 347 Res->Mem.Disp = Disp; 348 Res->Mem.BaseReg = BaseReg; 349 Res->Mem.IndexReg = IndexReg; 350 Res->Mem.Scale = Scale; 351 return Res; 352 } 353}; 354 355} // end anonymous namespace. 356 357bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { 358 unsigned basereg = is64Bit() ? X86::RSI : X86::ESI; 359 360 return (Op.isMem() && 361 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && 362 isa<MCConstantExpr>(Op.Mem.Disp) && 363 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 364 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0); 365} 366 367bool X86ATTAsmParser::isDstOp(X86Operand &Op) { 368 unsigned basereg = is64Bit() ? X86::RDI : X86::EDI; 369 370 return Op.isMem() && Op.Mem.SegReg == X86::ES && 371 isa<MCConstantExpr>(Op.Mem.Disp) && 372 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 373 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; 374} 375 376bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, 377 SMLoc &StartLoc, SMLoc &EndLoc) { 378 RegNo = 0; 379 const AsmToken &TokPercent = Parser.getTok(); 380 assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); 381 StartLoc = TokPercent.getLoc(); 382 Parser.Lex(); // Eat percent token. 383 384 const AsmToken &Tok = Parser.getTok(); 385 if (Tok.isNot(AsmToken::Identifier)) 386 return Error(Tok.getLoc(), "invalid register name"); 387 388 // FIXME: Validate register for the current architecture; we have to do 389 // validation later, so maybe there is no need for this here. 390 RegNo = MatchRegisterName(Tok.getString()); 391 392 // If the match failed, try the register name as lowercase. 393 if (RegNo == 0) 394 RegNo = MatchRegisterName(LowercaseString(Tok.getString())); 395 396 // FIXME: This should be done using Requires<In32BitMode> and 397 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions 398 // can be also checked. 399 if (RegNo == X86::RIZ && !is64Bit()) 400 return Error(Tok.getLoc(), "riz register in 64-bit mode only"); 401 402 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 403 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { 404 RegNo = X86::ST0; 405 EndLoc = Tok.getLoc(); 406 Parser.Lex(); // Eat 'st' 407 408 // Check to see if we have '(4)' after %st. 409 if (getLexer().isNot(AsmToken::LParen)) 410 return false; 411 // Lex the paren. 412 getParser().Lex(); 413 414 const AsmToken &IntTok = Parser.getTok(); 415 if (IntTok.isNot(AsmToken::Integer)) 416 return Error(IntTok.getLoc(), "expected stack index"); 417 switch (IntTok.getIntVal()) { 418 case 0: RegNo = X86::ST0; break; 419 case 1: RegNo = X86::ST1; break; 420 case 2: RegNo = X86::ST2; break; 421 case 3: RegNo = X86::ST3; break; 422 case 4: RegNo = X86::ST4; break; 423 case 5: RegNo = X86::ST5; break; 424 case 6: RegNo = X86::ST6; break; 425 case 7: RegNo = X86::ST7; break; 426 default: return Error(IntTok.getLoc(), "invalid stack index"); 427 } 428 429 if (getParser().Lex().isNot(AsmToken::RParen)) 430 return Error(Parser.getTok().getLoc(), "expected ')'"); 431 432 EndLoc = Tok.getLoc(); 433 Parser.Lex(); // Eat ')' 434 return false; 435 } 436 437 // If this is "db[0-7]", match it as an alias 438 // for dr[0-7]. 439 if (RegNo == 0 && Tok.getString().size() == 3 && 440 Tok.getString().startswith("db")) { 441 switch (Tok.getString()[2]) { 442 case '0': RegNo = X86::DR0; break; 443 case '1': RegNo = X86::DR1; break; 444 case '2': RegNo = X86::DR2; break; 445 case '3': RegNo = X86::DR3; break; 446 case '4': RegNo = X86::DR4; break; 447 case '5': RegNo = X86::DR5; break; 448 case '6': RegNo = X86::DR6; break; 449 case '7': RegNo = X86::DR7; break; 450 } 451 452 if (RegNo != 0) { 453 EndLoc = Tok.getLoc(); 454 Parser.Lex(); // Eat it. 455 return false; 456 } 457 } 458 459 if (RegNo == 0) 460 return Error(Tok.getLoc(), "invalid register name"); 461 462 EndLoc = Tok.getLoc(); 463 Parser.Lex(); // Eat identifier token. 464 return false; 465} 466 467X86Operand *X86ATTAsmParser::ParseOperand() { 468 switch (getLexer().getKind()) { 469 default: 470 // Parse a memory operand with no segment register. 471 return ParseMemOperand(0, Parser.getTok().getLoc()); 472 case AsmToken::Percent: { 473 // Read the register. 474 unsigned RegNo; 475 SMLoc Start, End; 476 if (ParseRegister(RegNo, Start, End)) return 0; 477 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 478 Error(Start, "eiz and riz can only be used as index registers"); 479 return 0; 480 } 481 482 // If this is a segment register followed by a ':', then this is the start 483 // of a memory reference, otherwise this is a normal register reference. 484 if (getLexer().isNot(AsmToken::Colon)) 485 return X86Operand::CreateReg(RegNo, Start, End); 486 487 488 getParser().Lex(); // Eat the colon. 489 return ParseMemOperand(RegNo, Start); 490 } 491 case AsmToken::Dollar: { 492 // $42 -> immediate. 493 SMLoc Start = Parser.getTok().getLoc(), End; 494 Parser.Lex(); 495 const MCExpr *Val; 496 if (getParser().ParseExpression(Val, End)) 497 return 0; 498 return X86Operand::CreateImm(Val, Start, End); 499 } 500 } 501} 502 503/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 504/// has already been parsed if present. 505X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { 506 507 // We have to disambiguate a parenthesized expression "(4+5)" from the start 508 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 509 // only way to do this without lookahead is to eat the '(' and see what is 510 // after it. 511 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 512 if (getLexer().isNot(AsmToken::LParen)) { 513 SMLoc ExprEnd; 514 if (getParser().ParseExpression(Disp, ExprEnd)) return 0; 515 516 // After parsing the base expression we could either have a parenthesized 517 // memory address or not. If not, return now. If so, eat the (. 518 if (getLexer().isNot(AsmToken::LParen)) { 519 // Unless we have a segment register, treat this as an immediate. 520 if (SegReg == 0) 521 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 522 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 523 } 524 525 // Eat the '('. 526 Parser.Lex(); 527 } else { 528 // Okay, we have a '('. We don't know if this is an expression or not, but 529 // so we have to eat the ( to see beyond it. 530 SMLoc LParenLoc = Parser.getTok().getLoc(); 531 Parser.Lex(); // Eat the '('. 532 533 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 534 // Nothing to do here, fall into the code below with the '(' part of the 535 // memory operand consumed. 536 } else { 537 SMLoc ExprEnd; 538 539 // It must be an parenthesized expression, parse it now. 540 if (getParser().ParseParenExpression(Disp, ExprEnd)) 541 return 0; 542 543 // After parsing the base expression we could either have a parenthesized 544 // memory address or not. If not, return now. If so, eat the (. 545 if (getLexer().isNot(AsmToken::LParen)) { 546 // Unless we have a segment register, treat this as an immediate. 547 if (SegReg == 0) 548 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 549 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 550 } 551 552 // Eat the '('. 553 Parser.Lex(); 554 } 555 } 556 557 // If we reached here, then we just ate the ( of the memory operand. Process 558 // the rest of the memory operand. 559 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 560 561 if (getLexer().is(AsmToken::Percent)) { 562 SMLoc L; 563 if (ParseRegister(BaseReg, L, L)) return 0; 564 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 565 Error(L, "eiz and riz can only be used as index registers"); 566 return 0; 567 } 568 } 569 570 if (getLexer().is(AsmToken::Comma)) { 571 Parser.Lex(); // Eat the comma. 572 573 // Following the comma we should have either an index register, or a scale 574 // value. We don't support the later form, but we want to parse it 575 // correctly. 576 // 577 // Not that even though it would be completely consistent to support syntax 578 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 579 if (getLexer().is(AsmToken::Percent)) { 580 SMLoc L; 581 if (ParseRegister(IndexReg, L, L)) return 0; 582 583 if (getLexer().isNot(AsmToken::RParen)) { 584 // Parse the scale amount: 585 // ::= ',' [scale-expression] 586 if (getLexer().isNot(AsmToken::Comma)) { 587 Error(Parser.getTok().getLoc(), 588 "expected comma in scale expression"); 589 return 0; 590 } 591 Parser.Lex(); // Eat the comma. 592 593 if (getLexer().isNot(AsmToken::RParen)) { 594 SMLoc Loc = Parser.getTok().getLoc(); 595 596 int64_t ScaleVal; 597 if (getParser().ParseAbsoluteExpression(ScaleVal)) 598 return 0; 599 600 // Validate the scale amount. 601 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 602 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 603 return 0; 604 } 605 Scale = (unsigned)ScaleVal; 606 } 607 } 608 } else if (getLexer().isNot(AsmToken::RParen)) { 609 // A scale amount without an index is ignored. 610 // index. 611 SMLoc Loc = Parser.getTok().getLoc(); 612 613 int64_t Value; 614 if (getParser().ParseAbsoluteExpression(Value)) 615 return 0; 616 617 if (Value != 1) 618 Warning(Loc, "scale factor without index register is ignored"); 619 Scale = 1; 620 } 621 } 622 623 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 624 if (getLexer().isNot(AsmToken::RParen)) { 625 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 626 return 0; 627 } 628 SMLoc MemEnd = Parser.getTok().getLoc(); 629 Parser.Lex(); // Eat the ')'. 630 631 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 632 MemStart, MemEnd); 633} 634 635bool X86ATTAsmParser:: 636ParseInstruction(StringRef Name, SMLoc NameLoc, 637 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 638 StringRef PatchedName = Name; 639 640 // FIXME: Hack to recognize setneb as setne. 641 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 642 PatchedName != "setb" && PatchedName != "setnb") 643 PatchedName = PatchedName.substr(0, Name.size()-1); 644 645 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 646 const MCExpr *ExtraImmOp = 0; 647 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 648 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 649 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 650 bool IsVCMP = PatchedName.startswith("vcmp"); 651 unsigned SSECCIdx = IsVCMP ? 4 : 3; 652 unsigned SSEComparisonCode = StringSwitch<unsigned>( 653 PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) 654 .Case("eq", 0) 655 .Case("lt", 1) 656 .Case("le", 2) 657 .Case("unord", 3) 658 .Case("neq", 4) 659 .Case("nlt", 5) 660 .Case("nle", 6) 661 .Case("ord", 7) 662 .Case("eq_uq", 8) 663 .Case("nge", 9) 664 .Case("ngt", 0x0A) 665 .Case("false", 0x0B) 666 .Case("neq_oq", 0x0C) 667 .Case("ge", 0x0D) 668 .Case("gt", 0x0E) 669 .Case("true", 0x0F) 670 .Case("eq_os", 0x10) 671 .Case("lt_oq", 0x11) 672 .Case("le_oq", 0x12) 673 .Case("unord_s", 0x13) 674 .Case("neq_us", 0x14) 675 .Case("nlt_uq", 0x15) 676 .Case("nle_uq", 0x16) 677 .Case("ord_s", 0x17) 678 .Case("eq_us", 0x18) 679 .Case("nge_uq", 0x19) 680 .Case("ngt_uq", 0x1A) 681 .Case("false_os", 0x1B) 682 .Case("neq_os", 0x1C) 683 .Case("ge_oq", 0x1D) 684 .Case("gt_oq", 0x1E) 685 .Case("true_us", 0x1F) 686 .Default(~0U); 687 if (SSEComparisonCode != ~0U) { 688 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, 689 getParser().getContext()); 690 if (PatchedName.endswith("ss")) { 691 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 692 } else if (PatchedName.endswith("sd")) { 693 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 694 } else if (PatchedName.endswith("ps")) { 695 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 696 } else { 697 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); 698 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 699 } 700 } 701 } 702 703 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 704 705 if (ExtraImmOp) 706 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 707 708 709 // Determine whether this is an instruction prefix. 710 bool isPrefix = 711 Name == "lock" || Name == "rep" || 712 Name == "repe" || Name == "repz" || 713 Name == "repne" || Name == "repnz" || 714 Name == "rex64" || Name == "data16"; 715 716 717 // This does the actual operand parsing. Don't parse any more if we have a 718 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 719 // just want to parse the "lock" as the first instruction and the "incl" as 720 // the next one. 721 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 722 723 // Parse '*' modifier. 724 if (getLexer().is(AsmToken::Star)) { 725 SMLoc Loc = Parser.getTok().getLoc(); 726 Operands.push_back(X86Operand::CreateToken("*", Loc)); 727 Parser.Lex(); // Eat the star. 728 } 729 730 // Read the first operand. 731 if (X86Operand *Op = ParseOperand()) 732 Operands.push_back(Op); 733 else { 734 Parser.EatToEndOfStatement(); 735 return true; 736 } 737 738 while (getLexer().is(AsmToken::Comma)) { 739 Parser.Lex(); // Eat the comma. 740 741 // Parse and remember the operand. 742 if (X86Operand *Op = ParseOperand()) 743 Operands.push_back(Op); 744 else { 745 Parser.EatToEndOfStatement(); 746 return true; 747 } 748 } 749 750 if (getLexer().isNot(AsmToken::EndOfStatement)) { 751 SMLoc Loc = getLexer().getLoc(); 752 Parser.EatToEndOfStatement(); 753 return Error(Loc, "unexpected token in argument list"); 754 } 755 } 756 757 if (getLexer().is(AsmToken::EndOfStatement)) 758 Parser.Lex(); // Consume the EndOfStatement 759 else if (isPrefix && getLexer().is(AsmToken::Slash)) 760 Parser.Lex(); // Consume the prefix separator Slash 761 762 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> 763 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 764 // documented form in various unofficial manuals, so a lot of code uses it. 765 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && 766 Operands.size() == 3) { 767 X86Operand &Op = *(X86Operand*)Operands.back(); 768 if (Op.isMem() && Op.Mem.SegReg == 0 && 769 isa<MCConstantExpr>(Op.Mem.Disp) && 770 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 771 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 772 SMLoc Loc = Op.getEndLoc(); 773 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 774 delete &Op; 775 } 776 } 777 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". 778 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && 779 Operands.size() == 3) { 780 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 781 if (Op.isMem() && Op.Mem.SegReg == 0 && 782 isa<MCConstantExpr>(Op.Mem.Disp) && 783 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 784 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 785 SMLoc Loc = Op.getEndLoc(); 786 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 787 delete &Op; 788 } 789 } 790 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]" 791 if (Name.startswith("ins") && Operands.size() == 3 && 792 (Name == "insb" || Name == "insw" || Name == "insl")) { 793 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 794 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 795 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) { 796 Operands.pop_back(); 797 Operands.pop_back(); 798 delete &Op; 799 delete &Op2; 800 } 801 } 802 803 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]" 804 if (Name.startswith("outs") && Operands.size() == 3 && 805 (Name == "outsb" || Name == "outsw" || Name == "outsl")) { 806 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 807 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 808 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) { 809 Operands.pop_back(); 810 Operands.pop_back(); 811 delete &Op; 812 delete &Op2; 813 } 814 } 815 816 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" 817 if (Name.startswith("movs") && Operands.size() == 3 && 818 (Name == "movsb" || Name == "movsw" || Name == "movsl" || 819 (is64Bit() && Name == "movsq"))) { 820 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 821 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 822 if (isSrcOp(Op) && isDstOp(Op2)) { 823 Operands.pop_back(); 824 Operands.pop_back(); 825 delete &Op; 826 delete &Op2; 827 } 828 } 829 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" 830 if (Name.startswith("lods") && Operands.size() == 3 && 831 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 832 Name == "lodsl" || (is64Bit() && Name == "lodsq"))) { 833 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 834 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); 835 if (isSrcOp(*Op1) && Op2->isReg()) { 836 const char *ins; 837 unsigned reg = Op2->getReg(); 838 bool isLods = Name == "lods"; 839 if (reg == X86::AL && (isLods || Name == "lodsb")) 840 ins = "lodsb"; 841 else if (reg == X86::AX && (isLods || Name == "lodsw")) 842 ins = "lodsw"; 843 else if (reg == X86::EAX && (isLods || Name == "lodsl")) 844 ins = "lodsl"; 845 else if (reg == X86::RAX && (isLods || Name == "lodsq")) 846 ins = "lodsq"; 847 else 848 ins = NULL; 849 if (ins != NULL) { 850 Operands.pop_back(); 851 Operands.pop_back(); 852 delete Op1; 853 delete Op2; 854 if (Name != ins) 855 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); 856 } 857 } 858 } 859 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" 860 if (Name.startswith("stos") && Operands.size() == 3 && 861 (Name == "stos" || Name == "stosb" || Name == "stosw" || 862 Name == "stosl" || (is64Bit() && Name == "stosq"))) { 863 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 864 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); 865 if (isDstOp(*Op2) && Op1->isReg()) { 866 const char *ins; 867 unsigned reg = Op1->getReg(); 868 bool isStos = Name == "stos"; 869 if (reg == X86::AL && (isStos || Name == "stosb")) 870 ins = "stosb"; 871 else if (reg == X86::AX && (isStos || Name == "stosw")) 872 ins = "stosw"; 873 else if (reg == X86::EAX && (isStos || Name == "stosl")) 874 ins = "stosl"; 875 else if (reg == X86::RAX && (isStos || Name == "stosq")) 876 ins = "stosq"; 877 else 878 ins = NULL; 879 if (ins != NULL) { 880 Operands.pop_back(); 881 Operands.pop_back(); 882 delete Op1; 883 delete Op2; 884 if (Name != ins) 885 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); 886 } 887 } 888 } 889 890 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 891 // "shift <op>". 892 if ((Name.startswith("shr") || Name.startswith("sar") || 893 Name.startswith("shl") || Name.startswith("sal") || 894 Name.startswith("rcl") || Name.startswith("rcr") || 895 Name.startswith("rol") || Name.startswith("ror")) && 896 Operands.size() == 3) { 897 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 898 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 899 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 900 delete Operands[1]; 901 Operands.erase(Operands.begin() + 1); 902 } 903 } 904 905 // Transforms "int $3" into "int3" as a size optimization. We can't write an 906 // instalias with an immediate operand yet. 907 if (Name == "int" && Operands.size() == 2) { 908 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 909 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 910 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) { 911 delete Operands[1]; 912 Operands.erase(Operands.begin() + 1); 913 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3"); 914 } 915 } 916 917 return false; 918} 919 920bool X86ATTAsmParser:: 921MatchAndEmitInstruction(SMLoc IDLoc, 922 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 923 MCStreamer &Out) { 924 assert(!Operands.empty() && "Unexpect empty operand list!"); 925 X86Operand *Op = static_cast<X86Operand*>(Operands[0]); 926 assert(Op->isToken() && "Leading operand should always be a mnemonic!"); 927 928 // First, handle aliases that expand to multiple instructions. 929 // FIXME: This should be replaced with a real .td file alias mechanism. 930 // Also, MatchInstructionImpl should do actually *do* the EmitInstruction 931 // call. 932 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || 933 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || 934 Op->getToken() == "finit" || Op->getToken() == "fsave" || 935 Op->getToken() == "fstenv" || Op->getToken() == "fclex") { 936 MCInst Inst; 937 Inst.setOpcode(X86::WAIT); 938 Out.EmitInstruction(Inst); 939 940 const char *Repl = 941 StringSwitch<const char*>(Op->getToken()) 942 .Case("finit", "fninit") 943 .Case("fsave", "fnsave") 944 .Case("fstcw", "fnstcw") 945 .Case("fstcww", "fnstcw") 946 .Case("fstenv", "fnstenv") 947 .Case("fstsw", "fnstsw") 948 .Case("fstsww", "fnstsw") 949 .Case("fclex", "fnclex") 950 .Default(0); 951 assert(Repl && "Unknown wait-prefixed instruction"); 952 delete Operands[0]; 953 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 954 } 955 956 bool WasOriginallyInvalidOperand = false; 957 unsigned OrigErrorInfo; 958 MCInst Inst; 959 960 // First, try a direct match. 961 switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) { 962 case Match_Success: 963 Out.EmitInstruction(Inst); 964 return false; 965 case Match_MissingFeature: 966 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 967 return true; 968 case Match_ConversionFail: 969 return Error(IDLoc, "unable to convert operands to instruction"); 970 case Match_InvalidOperand: 971 WasOriginallyInvalidOperand = true; 972 break; 973 case Match_MnemonicFail: 974 break; 975 } 976 977 // FIXME: Ideally, we would only attempt suffix matches for things which are 978 // valid prefixes, and we could just infer the right unambiguous 979 // type. However, that requires substantially more matcher support than the 980 // following hack. 981 982 // Change the operand to point to a temporary token. 983 StringRef Base = Op->getToken(); 984 SmallString<16> Tmp; 985 Tmp += Base; 986 Tmp += ' '; 987 Op->setTokenValue(Tmp.str()); 988 989 // If this instruction starts with an 'f', then it is a floating point stack 990 // instruction. These come in up to three forms for 32-bit, 64-bit, and 991 // 80-bit floating point, which use the suffixes s,l,t respectively. 992 // 993 // Otherwise, we assume that this may be an integer instruction, which comes 994 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 995 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 996 997 // Check for the various suffix matches. 998 Tmp[Base.size()] = Suffixes[0]; 999 unsigned ErrorInfoIgnore; 1000 MatchResultTy Match1, Match2, Match3, Match4; 1001 1002 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); 1003 Tmp[Base.size()] = Suffixes[1]; 1004 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); 1005 Tmp[Base.size()] = Suffixes[2]; 1006 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); 1007 Tmp[Base.size()] = Suffixes[3]; 1008 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); 1009 1010 // Restore the old token. 1011 Op->setTokenValue(Base); 1012 1013 // If exactly one matched, then we treat that as a successful match (and the 1014 // instruction will already have been filled in correctly, since the failing 1015 // matches won't have modified it). 1016 unsigned NumSuccessfulMatches = 1017 (Match1 == Match_Success) + (Match2 == Match_Success) + 1018 (Match3 == Match_Success) + (Match4 == Match_Success); 1019 if (NumSuccessfulMatches == 1) { 1020 Out.EmitInstruction(Inst); 1021 return false; 1022 } 1023 1024 // Otherwise, the match failed, try to produce a decent error message. 1025 1026 // If we had multiple suffix matches, then identify this as an ambiguous 1027 // match. 1028 if (NumSuccessfulMatches > 1) { 1029 char MatchChars[4]; 1030 unsigned NumMatches = 0; 1031 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0]; 1032 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1]; 1033 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2]; 1034 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3]; 1035 1036 SmallString<126> Msg; 1037 raw_svector_ostream OS(Msg); 1038 OS << "ambiguous instructions require an explicit suffix (could be "; 1039 for (unsigned i = 0; i != NumMatches; ++i) { 1040 if (i != 0) 1041 OS << ", "; 1042 if (i + 1 == NumMatches) 1043 OS << "or "; 1044 OS << "'" << Base << MatchChars[i] << "'"; 1045 } 1046 OS << ")"; 1047 Error(IDLoc, OS.str()); 1048 return true; 1049 } 1050 1051 // Okay, we know that none of the variants matched successfully. 1052 1053 // If all of the instructions reported an invalid mnemonic, then the original 1054 // mnemonic was invalid. 1055 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && 1056 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { 1057 if (!WasOriginallyInvalidOperand) { 1058 Error(IDLoc, "invalid instruction mnemonic '" + Base + "'"); 1059 return true; 1060 } 1061 1062 // Recover location info for the operand if we know which was the problem. 1063 SMLoc ErrorLoc = IDLoc; 1064 if (OrigErrorInfo != ~0U) { 1065 if (OrigErrorInfo >= Operands.size()) 1066 return Error(IDLoc, "too few operands for instruction"); 1067 1068 ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc(); 1069 if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; 1070 } 1071 1072 return Error(ErrorLoc, "invalid operand for instruction"); 1073 } 1074 1075 // If one instruction matched with a missing feature, report this as a 1076 // missing feature. 1077 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + 1078 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ 1079 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 1080 return true; 1081 } 1082 1083 // If one instruction matched with an invalid operand, report this as an 1084 // operand failure. 1085 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + 1086 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ 1087 Error(IDLoc, "invalid operand for instruction"); 1088 return true; 1089 } 1090 1091 // If all of these were an outright failure, report it in a useless way. 1092 // FIXME: We should give nicer diagnostics about the exact failure. 1093 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); 1094 return true; 1095} 1096 1097 1098bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { 1099 StringRef IDVal = DirectiveID.getIdentifier(); 1100 if (IDVal == ".word") 1101 return ParseDirectiveWord(2, DirectiveID.getLoc()); 1102 return true; 1103} 1104 1105/// ParseDirectiveWord 1106/// ::= .word [ expression (, expression)* ] 1107bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 1108 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1109 for (;;) { 1110 const MCExpr *Value; 1111 if (getParser().ParseExpression(Value)) 1112 return true; 1113 1114 getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); 1115 1116 if (getLexer().is(AsmToken::EndOfStatement)) 1117 break; 1118 1119 // FIXME: Improve diagnostic. 1120 if (getLexer().isNot(AsmToken::Comma)) 1121 return Error(L, "unexpected token in directive"); 1122 Parser.Lex(); 1123 } 1124 } 1125 1126 Parser.Lex(); 1127 return false; 1128} 1129 1130 1131 1132 1133extern "C" void LLVMInitializeX86AsmLexer(); 1134 1135// Force static initialization. 1136extern "C" void LLVMInitializeX86AsmParser() { 1137 RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); 1138 RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); 1139 LLVMInitializeX86AsmLexer(); 1140} 1141 1142#define GET_REGISTER_MATCHER 1143#define GET_MATCHER_IMPLEMENTATION 1144#include "X86GenAsmMatcher.inc" 1145