X86AsmParser.cpp revision 3b96e1fe3b695e6d845668ea90d75016f0f46a17
19682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 29682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall// 39682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall// The LLVM Compiler Infrastructure 49682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall// 59682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall// This file is distributed under the University of Illinois Open Source 69682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall// License. See LICENSE.TXT for details. 79682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall// 89682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall//===----------------------------------------------------------------------===// 99682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "MCTargetDesc/X86BaseInfo.h" 119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/MC/MCTargetAsmParser.h" 129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/MC/MCStreamer.h" 139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/MC/MCExpr.h" 149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/MC/MCInst.h" 159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/MC/MCRegisterInfo.h" 169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/MC/MCSubtargetInfo.h" 179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/MC/MCParser/MCAsmLexer.h" 189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/MC/MCParser/MCAsmParser.h" 199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/ADT/OwningPtr.h" 219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/ADT/SmallString.h" 229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/ADT/SmallVector.h" 239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/ADT/StringSwitch.h" 249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/ADT/Twine.h" 259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/Support/SourceMgr.h" 269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/Support/TargetRegistry.h" 279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "llvm/Support/raw_ostream.h" 289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallusing namespace llvm; 309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallnamespace { 329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstruct X86Operand; 339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallclass X86AsmParser : public MCTargetAsmParser { 359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall MCSubtargetInfo &STI; 369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall MCAsmParser &Parser; 379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallprivate: 399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall MCAsmParser &getParser() const { return Parser; } 409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall MCAsmLexer &getLexer() const { return Parser.getLexer(); } 429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bool Error(SMLoc L, const Twine &Msg, 449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { 459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Parser.Error(L, Msg, Ranges); 469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) { 499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Error(Loc, Msg); 509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return 0; 519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall X86Operand *ParseOperand(); 549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall X86Operand *ParseATTOperand(); 559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall X86Operand *ParseIntelOperand(); 569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall X86Operand *ParseIntelMemOperand(); 579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); 589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bool ParseDirectiveWord(unsigned Size, SMLoc L); 619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bool processInstruction(MCInst &Inst, 649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const SmallVectorImpl<MCParsedAsmOperand*> &Ops); 659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bool MatchAndEmitInstruction(SMLoc IDLoc, 679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SmallVectorImpl<MCParsedAsmOperand*> &Operands, 689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall MCStreamer &Out); 699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) 719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. 729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bool isSrcOp(X86Operand &Op); 739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode 759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /// or %es:(%edi) in 32bit mode. 769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bool isDstOp(X86Operand &Op); 779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bool is64BitMode() const { 799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall // FIXME: Can tablegen auto-generate this? 809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return (STI.getFeatureBits() & X86::Mode64Bit) != 0; 819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall void SwitchMode() { 839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit)); 849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall setAvailableFeatures(FB); 859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /// @name Auto-generated Matcher Functions 889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /// { 899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define GET_ASSEMBLER_HEADER 919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "X86GenAsmMatcher.inc" 929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /// } 949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallpublic: 969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) 979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : MCTargetAsmParser(), STI(sti), Parser(parser) { 989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall // Initialize the set of available features. 1009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); 1019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 1029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); 1039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, 1059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SmallVectorImpl<MCParsedAsmOperand*> &Operands); 1069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall virtual bool ParseDirective(AsmToken DirectiveID); 1089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}; 1099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} // end anonymous namespace 1109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/// @name Auto-generated Match Functions 1129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/// { 1139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic unsigned MatchRegisterName(StringRef Name); 1159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/// } 1179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic bool isImmSExti16i8Value(uint64_t Value) { 1199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return (( Value <= 0x000000000000007FULL)|| 1209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| 1219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 1229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 1239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic bool isImmSExti32i8Value(uint64_t Value) { 1259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return (( Value <= 0x000000000000007FULL)|| 1269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| 1279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 1289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 1299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic bool isImmZExtu32u8Value(uint64_t Value) { 1319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return (Value <= 0x00000000000000FFULL); 1329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 1339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic bool isImmSExti64i8Value(uint64_t Value) { 1359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return (( Value <= 0x000000000000007FULL)|| 1369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 1379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 1389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic bool isImmSExti64i32Value(uint64_t Value) { 1409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return (( Value <= 0x000000007FFFFFFFULL)|| 1419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 1429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 1439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallnamespace { 1449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/// X86Operand - Instances of this class represent a parsed X86 machine 1469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/// instruction. 1479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstruct X86Operand : public MCParsedAsmOperand { 1489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall enum KindTy { 1499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Token, 1509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Register, 1519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Immediate, 1529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Memory 1539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } Kind; 1549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SMLoc StartLoc, EndLoc; 1569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall union { 1589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall struct { 1599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const char *Data; 1609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned Length; 1619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } Tok; 1629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall struct { 1649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned RegNo; 1659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } Reg; 1669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall struct { 1689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const MCExpr *Val; 1699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } Imm; 1709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall struct { 1729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned SegReg; 1739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const MCExpr *Disp; 1749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned BaseReg; 1759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned IndexReg; 1769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned Scale; 1779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned Size; 1789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } Mem; 1799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }; 1809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall X86Operand(KindTy K, SMLoc Start, SMLoc End) 1829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : Kind(K), StartLoc(Start), EndLoc(End) {} 1839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /// getStartLoc - Get the location of the first token of this operand. 1859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SMLoc getStartLoc() const { return StartLoc; } 1869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /// getEndLoc - Get the location of the last token of this operand. 1879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SMLoc getEndLoc() const { return EndLoc; } 1889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } 1909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall virtual void print(raw_ostream &OS) const {} 1929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall StringRef getToken() const { 1949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall assert(Kind == Token && "Invalid access!"); 1959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return StringRef(Tok.Data, Tok.Length); 1969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 1979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall void setTokenValue(StringRef Value) { 1989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall assert(Kind == Token && "Invalid access!"); 1999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Tok.Data = Value.data(); 2009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Tok.Length = Value.size(); 2019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned getReg() const { 2049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall assert(Kind == Register && "Invalid access!"); 2059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Reg.RegNo; 2069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const MCExpr *getImm() const { 2099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall assert(Kind == Immediate && "Invalid access!"); 2109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Imm.Val; 2119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const MCExpr *getMemDisp() const { 2149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall assert(Kind == Memory && "Invalid access!"); 2159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Mem.Disp; 2169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned getMemSegReg() const { 2189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall assert(Kind == Memory && "Invalid access!"); 2199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Mem.SegReg; 2209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned getMemBaseReg() const { 2229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall assert(Kind == Memory && "Invalid access!"); 2239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Mem.BaseReg; 2249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned getMemIndexReg() const { 2269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall assert(Kind == Memory && "Invalid access!"); 2279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Mem.IndexReg; 228 } 229 unsigned getMemScale() const { 230 assert(Kind == Memory && "Invalid access!"); 231 return Mem.Scale; 232 } 233 234 bool isToken() const {return Kind == Token; } 235 236 bool isImm() const { return Kind == Immediate; } 237 238 bool isImmSExti16i8() const { 239 if (!isImm()) 240 return false; 241 242 // If this isn't a constant expr, just assume it fits and let relaxation 243 // handle it. 244 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 245 if (!CE) 246 return true; 247 248 // Otherwise, check the value is in a range that makes sense for this 249 // extension. 250 return isImmSExti16i8Value(CE->getValue()); 251 } 252 bool isImmSExti32i8() const { 253 if (!isImm()) 254 return false; 255 256 // If this isn't a constant expr, just assume it fits and let relaxation 257 // handle it. 258 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 259 if (!CE) 260 return true; 261 262 // Otherwise, check the value is in a range that makes sense for this 263 // extension. 264 return isImmSExti32i8Value(CE->getValue()); 265 } 266 bool isImmZExtu32u8() const { 267 if (!isImm()) 268 return false; 269 270 // If this isn't a constant expr, just assume it fits and let relaxation 271 // handle it. 272 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 273 if (!CE) 274 return true; 275 276 // Otherwise, check the value is in a range that makes sense for this 277 // extension. 278 return isImmZExtu32u8Value(CE->getValue()); 279 } 280 bool isImmSExti64i8() const { 281 if (!isImm()) 282 return false; 283 284 // If this isn't a constant expr, just assume it fits and let relaxation 285 // handle it. 286 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 287 if (!CE) 288 return true; 289 290 // Otherwise, check the value is in a range that makes sense for this 291 // extension. 292 return isImmSExti64i8Value(CE->getValue()); 293 } 294 bool isImmSExti64i32() const { 295 if (!isImm()) 296 return false; 297 298 // If this isn't a constant expr, just assume it fits and let relaxation 299 // handle it. 300 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 301 if (!CE) 302 return true; 303 304 // Otherwise, check the value is in a range that makes sense for this 305 // extension. 306 return isImmSExti64i32Value(CE->getValue()); 307 } 308 309 bool isMem() const { return Kind == Memory; } 310 bool isMem8() const { 311 return Kind == Memory && (!Mem.Size || Mem.Size == 8); 312 } 313 bool isMem16() const { 314 return Kind == Memory && (!Mem.Size || Mem.Size == 16); 315 } 316 bool isMem32() const { 317 return Kind == Memory && (!Mem.Size || Mem.Size == 32); 318 } 319 bool isMem64() const { 320 return Kind == Memory && (!Mem.Size || Mem.Size == 64); 321 } 322 bool isMem80() const { 323 return Kind == Memory && (!Mem.Size || Mem.Size == 80); 324 } 325 bool isMem128() const { 326 return Kind == Memory && (!Mem.Size || Mem.Size == 128); 327 } 328 bool isMem256() const { 329 return Kind == Memory && (!Mem.Size || Mem.Size == 256); 330 } 331 332 bool isAbsMem() const { 333 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && 334 !getMemIndexReg() && getMemScale() == 1; 335 } 336 337 bool isReg() const { return Kind == Register; } 338 339 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 340 // Add as immediates when possible. 341 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 342 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 343 else 344 Inst.addOperand(MCOperand::CreateExpr(Expr)); 345 } 346 347 void addRegOperands(MCInst &Inst, unsigned N) const { 348 assert(N == 1 && "Invalid number of operands!"); 349 Inst.addOperand(MCOperand::CreateReg(getReg())); 350 } 351 352 void addImmOperands(MCInst &Inst, unsigned N) const { 353 assert(N == 1 && "Invalid number of operands!"); 354 addExpr(Inst, getImm()); 355 } 356 357 void addMem8Operands(MCInst &Inst, unsigned N) const { 358 addMemOperands(Inst, N); 359 } 360 void addMem16Operands(MCInst &Inst, unsigned N) const { 361 addMemOperands(Inst, N); 362 } 363 void addMem32Operands(MCInst &Inst, unsigned N) const { 364 addMemOperands(Inst, N); 365 } 366 void addMem64Operands(MCInst &Inst, unsigned N) const { 367 addMemOperands(Inst, N); 368 } 369 void addMem80Operands(MCInst &Inst, unsigned N) const { 370 addMemOperands(Inst, N); 371 } 372 void addMem128Operands(MCInst &Inst, unsigned N) const { 373 addMemOperands(Inst, N); 374 } 375 void addMem256Operands(MCInst &Inst, unsigned N) const { 376 addMemOperands(Inst, N); 377 } 378 379 void addMemOperands(MCInst &Inst, unsigned N) const { 380 assert((N == 5) && "Invalid number of operands!"); 381 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 382 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 383 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 384 addExpr(Inst, getMemDisp()); 385 Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); 386 } 387 388 void addAbsMemOperands(MCInst &Inst, unsigned N) const { 389 assert((N == 1) && "Invalid number of operands!"); 390 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 391 } 392 393 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { 394 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1); 395 X86Operand *Res = new X86Operand(Token, Loc, EndLoc); 396 Res->Tok.Data = Str.data(); 397 Res->Tok.Length = Str.size(); 398 return Res; 399 } 400 401 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) { 402 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); 403 Res->Reg.RegNo = RegNo; 404 return Res; 405 } 406 407 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ 408 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); 409 Res->Imm.Val = Val; 410 return Res; 411 } 412 413 /// Create an absolute memory operand. 414 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, 415 SMLoc EndLoc, unsigned Size = 0) { 416 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 417 Res->Mem.SegReg = 0; 418 Res->Mem.Disp = Disp; 419 Res->Mem.BaseReg = 0; 420 Res->Mem.IndexReg = 0; 421 Res->Mem.Scale = 1; 422 Res->Mem.Size = Size; 423 return Res; 424 } 425 426 /// Create a generalized memory operand. 427 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, 428 unsigned BaseReg, unsigned IndexReg, 429 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, 430 unsigned Size = 0) { 431 // We should never just have a displacement, that should be parsed as an 432 // absolute memory operand. 433 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); 434 435 // The scale should always be one of {1,2,4,8}. 436 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && 437 "Invalid scale!"); 438 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 439 Res->Mem.SegReg = SegReg; 440 Res->Mem.Disp = Disp; 441 Res->Mem.BaseReg = BaseReg; 442 Res->Mem.IndexReg = IndexReg; 443 Res->Mem.Scale = Scale; 444 Res->Mem.Size = Size; 445 return Res; 446 } 447}; 448 449} // end anonymous namespace. 450 451bool X86AsmParser::isSrcOp(X86Operand &Op) { 452 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI; 453 454 return (Op.isMem() && 455 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && 456 isa<MCConstantExpr>(Op.Mem.Disp) && 457 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 458 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0); 459} 460 461bool X86AsmParser::isDstOp(X86Operand &Op) { 462 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; 463 464 return Op.isMem() && Op.Mem.SegReg == X86::ES && 465 isa<MCConstantExpr>(Op.Mem.Disp) && 466 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 467 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; 468} 469 470bool X86AsmParser::ParseRegister(unsigned &RegNo, 471 SMLoc &StartLoc, SMLoc &EndLoc) { 472 RegNo = 0; 473 bool IntelSyntax = getParser().getAssemblerDialect(); 474 if (!IntelSyntax) { 475 const AsmToken &TokPercent = Parser.getTok(); 476 assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); 477 StartLoc = TokPercent.getLoc(); 478 Parser.Lex(); // Eat percent token. 479 } 480 481 const AsmToken &Tok = Parser.getTok(); 482 if (Tok.isNot(AsmToken::Identifier)) { 483 if (IntelSyntax) return true; 484 return Error(StartLoc, "invalid register name", 485 SMRange(StartLoc, Tok.getEndLoc())); 486 } 487 488 RegNo = MatchRegisterName(Tok.getString()); 489 490 // If the match failed, try the register name as lowercase. 491 if (RegNo == 0) 492 RegNo = MatchRegisterName(Tok.getString().lower()); 493 494 if (!is64BitMode()) { 495 // FIXME: This should be done using Requires<In32BitMode> and 496 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 497 // checked. 498 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 499 // REX prefix. 500 if (RegNo == X86::RIZ || 501 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 502 X86II::isX86_64NonExtLowByteReg(RegNo) || 503 X86II::isX86_64ExtendedReg(RegNo)) 504 return Error(StartLoc, "register %" 505 + Tok.getString() + " is only available in 64-bit mode", 506 SMRange(StartLoc, Tok.getEndLoc())); 507 } 508 509 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 510 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { 511 RegNo = X86::ST0; 512 EndLoc = Tok.getLoc(); 513 Parser.Lex(); // Eat 'st' 514 515 // Check to see if we have '(4)' after %st. 516 if (getLexer().isNot(AsmToken::LParen)) 517 return false; 518 // Lex the paren. 519 getParser().Lex(); 520 521 const AsmToken &IntTok = Parser.getTok(); 522 if (IntTok.isNot(AsmToken::Integer)) 523 return Error(IntTok.getLoc(), "expected stack index"); 524 switch (IntTok.getIntVal()) { 525 case 0: RegNo = X86::ST0; break; 526 case 1: RegNo = X86::ST1; break; 527 case 2: RegNo = X86::ST2; break; 528 case 3: RegNo = X86::ST3; break; 529 case 4: RegNo = X86::ST4; break; 530 case 5: RegNo = X86::ST5; break; 531 case 6: RegNo = X86::ST6; break; 532 case 7: RegNo = X86::ST7; break; 533 default: return Error(IntTok.getLoc(), "invalid stack index"); 534 } 535 536 if (getParser().Lex().isNot(AsmToken::RParen)) 537 return Error(Parser.getTok().getLoc(), "expected ')'"); 538 539 EndLoc = Tok.getLoc(); 540 Parser.Lex(); // Eat ')' 541 return false; 542 } 543 544 // If this is "db[0-7]", match it as an alias 545 // for dr[0-7]. 546 if (RegNo == 0 && Tok.getString().size() == 3 && 547 Tok.getString().startswith("db")) { 548 switch (Tok.getString()[2]) { 549 case '0': RegNo = X86::DR0; break; 550 case '1': RegNo = X86::DR1; break; 551 case '2': RegNo = X86::DR2; break; 552 case '3': RegNo = X86::DR3; break; 553 case '4': RegNo = X86::DR4; break; 554 case '5': RegNo = X86::DR5; break; 555 case '6': RegNo = X86::DR6; break; 556 case '7': RegNo = X86::DR7; break; 557 } 558 559 if (RegNo != 0) { 560 EndLoc = Tok.getLoc(); 561 Parser.Lex(); // Eat it. 562 return false; 563 } 564 } 565 566 if (RegNo == 0) { 567 if (IntelSyntax) return true; 568 return Error(StartLoc, "invalid register name", 569 SMRange(StartLoc, Tok.getEndLoc())); 570 } 571 572 EndLoc = Tok.getEndLoc(); 573 Parser.Lex(); // Eat identifier token. 574 return false; 575} 576 577X86Operand *X86AsmParser::ParseOperand() { 578 if (getParser().getAssemblerDialect()) 579 return ParseIntelOperand(); 580 return ParseATTOperand(); 581} 582 583/// getIntelMemOperandSize - Return intel memory operand size. 584static unsigned getIntelMemOperandSize(StringRef OpStr) { 585 unsigned Size = 0; 586 if (OpStr == "BYTE") Size = 8; 587 if (OpStr == "WORD") Size = 16; 588 if (OpStr == "DWORD") Size = 32; 589 if (OpStr == "QWORD") Size = 64; 590 if (OpStr == "XWORD") Size = 80; 591 if (OpStr == "XMMWORD") Size = 128; 592 if (OpStr == "YMMWORD") Size = 256; 593 return Size; 594} 595 596X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, 597 unsigned Size) { 598 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 599 SMLoc Start = Parser.getTok().getLoc(), End; 600 601 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 602 // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] 603 604 // Eat '[' 605 if (getLexer().isNot(AsmToken::LBrac)) 606 return ErrorOperand(Start, "Expected '[' token!"); 607 Parser.Lex(); 608 609 if (getLexer().is(AsmToken::Identifier)) { 610 // Parse BaseReg 611 if (ParseRegister(BaseReg, Start, End)) { 612 // Handle '[' 'symbol' ']' 613 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 614 if (getParser().ParseExpression(Disp, End)) return 0; 615 if (getLexer().isNot(AsmToken::RBrac)) 616 return ErrorOperand(Start, "Expected ']' token!"); 617 Parser.Lex(); 618 return X86Operand::CreateMem(Disp, Start, End, Size); 619 } 620 } else if (getLexer().is(AsmToken::Integer)) { 621 int64_t Val = Parser.getTok().getIntVal(); 622 Parser.Lex(); 623 SMLoc Loc = Parser.getTok().getLoc(); 624 if (getLexer().is(AsmToken::RBrac)) { 625 // Handle '[' number ']' 626 Parser.Lex(); 627 return X86Operand::CreateMem(MCConstantExpr::Create(Val, getContext()), 628 Start, End, Size); 629 } else if (getLexer().is(AsmToken::Star)) { 630 // Handle '[' Scale*IndexReg ']' 631 Parser.Lex(); 632 SMLoc IdxRegLoc = Parser.getTok().getLoc(); 633 if (ParseRegister(IndexReg, IdxRegLoc, End)) 634 return ErrorOperand(IdxRegLoc, "Expected register"); 635 Scale = Val; 636 } else 637 return ErrorOperand(Loc, "Unepxeted token"); 638 } 639 640 if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) { 641 bool isPlus = getLexer().is(AsmToken::Plus); 642 Parser.Lex(); 643 SMLoc PlusLoc = Parser.getTok().getLoc(); 644 if (getLexer().is(AsmToken::Integer)) { 645 int64_t Val = Parser.getTok().getIntVal(); 646 Parser.Lex(); 647 if (getLexer().is(AsmToken::Star)) { 648 Parser.Lex(); 649 SMLoc IdxRegLoc = Parser.getTok().getLoc(); 650 if (ParseRegister(IndexReg, IdxRegLoc, End)) 651 return ErrorOperand(IdxRegLoc, "Expected register"); 652 Scale = Val; 653 } else if (getLexer().is(AsmToken::RBrac)) { 654 const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext()); 655 Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext()); 656 } else 657 return ErrorOperand(PlusLoc, "unexpected token after +"); 658 } else if (getLexer().is(AsmToken::Identifier)) { 659 // This could be an index register or a displacement expression. 660 End = Parser.getTok().getLoc(); 661 if (!IndexReg) 662 ParseRegister(IndexReg, Start, End); 663 else if (getParser().ParseExpression(Disp, End)) return 0; 664 } 665 } 666 667 if (getLexer().isNot(AsmToken::RBrac)) 668 if (getParser().ParseExpression(Disp, End)) return 0; 669 670 End = Parser.getTok().getLoc(); 671 if (getLexer().isNot(AsmToken::RBrac)) 672 return ErrorOperand(End, "expected ']' token!"); 673 Parser.Lex(); 674 End = Parser.getTok().getLoc(); 675 676 // handle [-42] 677 if (!BaseReg && !IndexReg) 678 return X86Operand::CreateMem(Disp, Start, End, Size); 679 680 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 681 Start, End, Size); 682} 683 684/// ParseIntelMemOperand - Parse intel style memory operand. 685X86Operand *X86AsmParser::ParseIntelMemOperand() { 686 const AsmToken &Tok = Parser.getTok(); 687 SMLoc Start = Parser.getTok().getLoc(), End; 688 unsigned SegReg = 0; 689 690 unsigned Size = getIntelMemOperandSize(Tok.getString()); 691 if (Size) { 692 Parser.Lex(); 693 assert (Tok.getString() == "PTR" && "Unexpected token!"); 694 Parser.Lex(); 695 } 696 697 if (getLexer().is(AsmToken::LBrac)) 698 return ParseIntelBracExpression(SegReg, Size); 699 700 if (!ParseRegister(SegReg, Start, End)) { 701 // Handel SegReg : [ ... ] 702 if (getLexer().isNot(AsmToken::Colon)) 703 return ErrorOperand(Start, "Expected ':' token!"); 704 Parser.Lex(); // Eat : 705 if (getLexer().isNot(AsmToken::LBrac)) 706 return ErrorOperand(Start, "Expected '[' token!"); 707 return ParseIntelBracExpression(SegReg, Size); 708 } 709 710 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 711 if (getParser().ParseExpression(Disp, End)) return 0; 712 return X86Operand::CreateMem(Disp, Start, End, Size); 713} 714 715X86Operand *X86AsmParser::ParseIntelOperand() { 716 SMLoc Start = Parser.getTok().getLoc(), End; 717 718 // immediate. 719 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || 720 getLexer().is(AsmToken::Minus)) { 721 const MCExpr *Val; 722 if (!getParser().ParseExpression(Val, End)) { 723 End = Parser.getTok().getLoc(); 724 return X86Operand::CreateImm(Val, Start, End); 725 } 726 } 727 728 // register 729 unsigned RegNo = 0; 730 if (!ParseRegister(RegNo, Start, End)) { 731 End = Parser.getTok().getLoc(); 732 return X86Operand::CreateReg(RegNo, Start, End); 733 } 734 735 // mem operand 736 return ParseIntelMemOperand(); 737} 738 739X86Operand *X86AsmParser::ParseATTOperand() { 740 switch (getLexer().getKind()) { 741 default: 742 // Parse a memory operand with no segment register. 743 return ParseMemOperand(0, Parser.getTok().getLoc()); 744 case AsmToken::Percent: { 745 // Read the register. 746 unsigned RegNo; 747 SMLoc Start, End; 748 if (ParseRegister(RegNo, Start, End)) return 0; 749 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 750 Error(Start, "%eiz and %riz can only be used as index registers", 751 SMRange(Start, End)); 752 return 0; 753 } 754 755 // If this is a segment register followed by a ':', then this is the start 756 // of a memory reference, otherwise this is a normal register reference. 757 if (getLexer().isNot(AsmToken::Colon)) 758 return X86Operand::CreateReg(RegNo, Start, End); 759 760 761 getParser().Lex(); // Eat the colon. 762 return ParseMemOperand(RegNo, Start); 763 } 764 case AsmToken::Dollar: { 765 // $42 -> immediate. 766 SMLoc Start = Parser.getTok().getLoc(), End; 767 Parser.Lex(); 768 const MCExpr *Val; 769 if (getParser().ParseExpression(Val, End)) 770 return 0; 771 return X86Operand::CreateImm(Val, Start, End); 772 } 773 } 774} 775 776/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 777/// has already been parsed if present. 778X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { 779 780 // We have to disambiguate a parenthesized expression "(4+5)" from the start 781 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 782 // only way to do this without lookahead is to eat the '(' and see what is 783 // after it. 784 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 785 if (getLexer().isNot(AsmToken::LParen)) { 786 SMLoc ExprEnd; 787 if (getParser().ParseExpression(Disp, ExprEnd)) return 0; 788 789 // After parsing the base expression we could either have a parenthesized 790 // memory address or not. If not, return now. If so, eat the (. 791 if (getLexer().isNot(AsmToken::LParen)) { 792 // Unless we have a segment register, treat this as an immediate. 793 if (SegReg == 0) 794 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 795 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 796 } 797 798 // Eat the '('. 799 Parser.Lex(); 800 } else { 801 // Okay, we have a '('. We don't know if this is an expression or not, but 802 // so we have to eat the ( to see beyond it. 803 SMLoc LParenLoc = Parser.getTok().getLoc(); 804 Parser.Lex(); // Eat the '('. 805 806 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 807 // Nothing to do here, fall into the code below with the '(' part of the 808 // memory operand consumed. 809 } else { 810 SMLoc ExprEnd; 811 812 // It must be an parenthesized expression, parse it now. 813 if (getParser().ParseParenExpression(Disp, ExprEnd)) 814 return 0; 815 816 // After parsing the base expression we could either have a parenthesized 817 // memory address or not. If not, return now. If so, eat the (. 818 if (getLexer().isNot(AsmToken::LParen)) { 819 // Unless we have a segment register, treat this as an immediate. 820 if (SegReg == 0) 821 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 822 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 823 } 824 825 // Eat the '('. 826 Parser.Lex(); 827 } 828 } 829 830 // If we reached here, then we just ate the ( of the memory operand. Process 831 // the rest of the memory operand. 832 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 833 834 if (getLexer().is(AsmToken::Percent)) { 835 SMLoc StartLoc, EndLoc; 836 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0; 837 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 838 Error(StartLoc, "eiz and riz can only be used as index registers", 839 SMRange(StartLoc, EndLoc)); 840 return 0; 841 } 842 } 843 844 if (getLexer().is(AsmToken::Comma)) { 845 Parser.Lex(); // Eat the comma. 846 847 // Following the comma we should have either an index register, or a scale 848 // value. We don't support the later form, but we want to parse it 849 // correctly. 850 // 851 // Not that even though it would be completely consistent to support syntax 852 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 853 if (getLexer().is(AsmToken::Percent)) { 854 SMLoc L; 855 if (ParseRegister(IndexReg, L, L)) return 0; 856 857 if (getLexer().isNot(AsmToken::RParen)) { 858 // Parse the scale amount: 859 // ::= ',' [scale-expression] 860 if (getLexer().isNot(AsmToken::Comma)) { 861 Error(Parser.getTok().getLoc(), 862 "expected comma in scale expression"); 863 return 0; 864 } 865 Parser.Lex(); // Eat the comma. 866 867 if (getLexer().isNot(AsmToken::RParen)) { 868 SMLoc Loc = Parser.getTok().getLoc(); 869 870 int64_t ScaleVal; 871 if (getParser().ParseAbsoluteExpression(ScaleVal)) 872 return 0; 873 874 // Validate the scale amount. 875 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 876 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 877 return 0; 878 } 879 Scale = (unsigned)ScaleVal; 880 } 881 } 882 } else if (getLexer().isNot(AsmToken::RParen)) { 883 // A scale amount without an index is ignored. 884 // index. 885 SMLoc Loc = Parser.getTok().getLoc(); 886 887 int64_t Value; 888 if (getParser().ParseAbsoluteExpression(Value)) 889 return 0; 890 891 if (Value != 1) 892 Warning(Loc, "scale factor without index register is ignored"); 893 Scale = 1; 894 } 895 } 896 897 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 898 if (getLexer().isNot(AsmToken::RParen)) { 899 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 900 return 0; 901 } 902 SMLoc MemEnd = Parser.getTok().getLoc(); 903 Parser.Lex(); // Eat the ')'. 904 905 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 906 MemStart, MemEnd); 907} 908 909bool X86AsmParser:: 910ParseInstruction(StringRef Name, SMLoc NameLoc, 911 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 912 StringRef PatchedName = Name; 913 914 // FIXME: Hack to recognize setneb as setne. 915 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 916 PatchedName != "setb" && PatchedName != "setnb") 917 PatchedName = PatchedName.substr(0, Name.size()-1); 918 919 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 920 const MCExpr *ExtraImmOp = 0; 921 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 922 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 923 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 924 bool IsVCMP = PatchedName.startswith("vcmp"); 925 unsigned SSECCIdx = IsVCMP ? 4 : 3; 926 unsigned SSEComparisonCode = StringSwitch<unsigned>( 927 PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) 928 .Case("eq", 0) 929 .Case("lt", 1) 930 .Case("le", 2) 931 .Case("unord", 3) 932 .Case("neq", 4) 933 .Case("nlt", 5) 934 .Case("nle", 6) 935 .Case("ord", 7) 936 .Case("eq_uq", 8) 937 .Case("nge", 9) 938 .Case("ngt", 0x0A) 939 .Case("false", 0x0B) 940 .Case("neq_oq", 0x0C) 941 .Case("ge", 0x0D) 942 .Case("gt", 0x0E) 943 .Case("true", 0x0F) 944 .Case("eq_os", 0x10) 945 .Case("lt_oq", 0x11) 946 .Case("le_oq", 0x12) 947 .Case("unord_s", 0x13) 948 .Case("neq_us", 0x14) 949 .Case("nlt_uq", 0x15) 950 .Case("nle_uq", 0x16) 951 .Case("ord_s", 0x17) 952 .Case("eq_us", 0x18) 953 .Case("nge_uq", 0x19) 954 .Case("ngt_uq", 0x1A) 955 .Case("false_os", 0x1B) 956 .Case("neq_os", 0x1C) 957 .Case("ge_oq", 0x1D) 958 .Case("gt_oq", 0x1E) 959 .Case("true_us", 0x1F) 960 .Default(~0U); 961 if (SSEComparisonCode != ~0U) { 962 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, 963 getParser().getContext()); 964 if (PatchedName.endswith("ss")) { 965 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 966 } else if (PatchedName.endswith("sd")) { 967 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 968 } else if (PatchedName.endswith("ps")) { 969 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 970 } else { 971 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); 972 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 973 } 974 } 975 } 976 977 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 978 979 if (ExtraImmOp) 980 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 981 982 983 // Determine whether this is an instruction prefix. 984 bool isPrefix = 985 Name == "lock" || Name == "rep" || 986 Name == "repe" || Name == "repz" || 987 Name == "repne" || Name == "repnz" || 988 Name == "rex64" || Name == "data16"; 989 990 991 // This does the actual operand parsing. Don't parse any more if we have a 992 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 993 // just want to parse the "lock" as the first instruction and the "incl" as 994 // the next one. 995 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 996 997 // Parse '*' modifier. 998 if (getLexer().is(AsmToken::Star)) { 999 SMLoc Loc = Parser.getTok().getLoc(); 1000 Operands.push_back(X86Operand::CreateToken("*", Loc)); 1001 Parser.Lex(); // Eat the star. 1002 } 1003 1004 // Read the first operand. 1005 if (X86Operand *Op = ParseOperand()) 1006 Operands.push_back(Op); 1007 else { 1008 Parser.EatToEndOfStatement(); 1009 return true; 1010 } 1011 1012 while (getLexer().is(AsmToken::Comma)) { 1013 Parser.Lex(); // Eat the comma. 1014 1015 // Parse and remember the operand. 1016 if (X86Operand *Op = ParseOperand()) 1017 Operands.push_back(Op); 1018 else { 1019 Parser.EatToEndOfStatement(); 1020 return true; 1021 } 1022 } 1023 1024 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1025 SMLoc Loc = getLexer().getLoc(); 1026 Parser.EatToEndOfStatement(); 1027 return Error(Loc, "unexpected token in argument list"); 1028 } 1029 } 1030 1031 if (getLexer().is(AsmToken::EndOfStatement)) 1032 Parser.Lex(); // Consume the EndOfStatement 1033 else if (isPrefix && getLexer().is(AsmToken::Slash)) 1034 Parser.Lex(); // Consume the prefix separator Slash 1035 1036 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> 1037 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 1038 // documented form in various unofficial manuals, so a lot of code uses it. 1039 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && 1040 Operands.size() == 3) { 1041 X86Operand &Op = *(X86Operand*)Operands.back(); 1042 if (Op.isMem() && Op.Mem.SegReg == 0 && 1043 isa<MCConstantExpr>(Op.Mem.Disp) && 1044 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1045 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 1046 SMLoc Loc = Op.getEndLoc(); 1047 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 1048 delete &Op; 1049 } 1050 } 1051 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". 1052 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && 1053 Operands.size() == 3) { 1054 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1055 if (Op.isMem() && Op.Mem.SegReg == 0 && 1056 isa<MCConstantExpr>(Op.Mem.Disp) && 1057 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1058 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 1059 SMLoc Loc = Op.getEndLoc(); 1060 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 1061 delete &Op; 1062 } 1063 } 1064 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]" 1065 if (Name.startswith("ins") && Operands.size() == 3 && 1066 (Name == "insb" || Name == "insw" || Name == "insl")) { 1067 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1068 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 1069 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) { 1070 Operands.pop_back(); 1071 Operands.pop_back(); 1072 delete &Op; 1073 delete &Op2; 1074 } 1075 } 1076 1077 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]" 1078 if (Name.startswith("outs") && Operands.size() == 3 && 1079 (Name == "outsb" || Name == "outsw" || Name == "outsl")) { 1080 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1081 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 1082 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) { 1083 Operands.pop_back(); 1084 Operands.pop_back(); 1085 delete &Op; 1086 delete &Op2; 1087 } 1088 } 1089 1090 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" 1091 if (Name.startswith("movs") && Operands.size() == 3 && 1092 (Name == "movsb" || Name == "movsw" || Name == "movsl" || 1093 (is64BitMode() && Name == "movsq"))) { 1094 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1095 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 1096 if (isSrcOp(Op) && isDstOp(Op2)) { 1097 Operands.pop_back(); 1098 Operands.pop_back(); 1099 delete &Op; 1100 delete &Op2; 1101 } 1102 } 1103 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" 1104 if (Name.startswith("lods") && Operands.size() == 3 && 1105 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 1106 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) { 1107 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 1108 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); 1109 if (isSrcOp(*Op1) && Op2->isReg()) { 1110 const char *ins; 1111 unsigned reg = Op2->getReg(); 1112 bool isLods = Name == "lods"; 1113 if (reg == X86::AL && (isLods || Name == "lodsb")) 1114 ins = "lodsb"; 1115 else if (reg == X86::AX && (isLods || Name == "lodsw")) 1116 ins = "lodsw"; 1117 else if (reg == X86::EAX && (isLods || Name == "lodsl")) 1118 ins = "lodsl"; 1119 else if (reg == X86::RAX && (isLods || Name == "lodsq")) 1120 ins = "lodsq"; 1121 else 1122 ins = NULL; 1123 if (ins != NULL) { 1124 Operands.pop_back(); 1125 Operands.pop_back(); 1126 delete Op1; 1127 delete Op2; 1128 if (Name != ins) 1129 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); 1130 } 1131 } 1132 } 1133 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" 1134 if (Name.startswith("stos") && Operands.size() == 3 && 1135 (Name == "stos" || Name == "stosb" || Name == "stosw" || 1136 Name == "stosl" || (is64BitMode() && Name == "stosq"))) { 1137 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 1138 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); 1139 if (isDstOp(*Op2) && Op1->isReg()) { 1140 const char *ins; 1141 unsigned reg = Op1->getReg(); 1142 bool isStos = Name == "stos"; 1143 if (reg == X86::AL && (isStos || Name == "stosb")) 1144 ins = "stosb"; 1145 else if (reg == X86::AX && (isStos || Name == "stosw")) 1146 ins = "stosw"; 1147 else if (reg == X86::EAX && (isStos || Name == "stosl")) 1148 ins = "stosl"; 1149 else if (reg == X86::RAX && (isStos || Name == "stosq")) 1150 ins = "stosq"; 1151 else 1152 ins = NULL; 1153 if (ins != NULL) { 1154 Operands.pop_back(); 1155 Operands.pop_back(); 1156 delete Op1; 1157 delete Op2; 1158 if (Name != ins) 1159 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); 1160 } 1161 } 1162 } 1163 1164 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 1165 // "shift <op>". 1166 if ((Name.startswith("shr") || Name.startswith("sar") || 1167 Name.startswith("shl") || Name.startswith("sal") || 1168 Name.startswith("rcl") || Name.startswith("rcr") || 1169 Name.startswith("rol") || Name.startswith("ror")) && 1170 Operands.size() == 3) { 1171 if (getParser().getAssemblerDialect()) { 1172 // Intel syntax 1173 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]); 1174 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 1175 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 1176 delete Operands[2]; 1177 Operands.pop_back(); 1178 } 1179 } else { 1180 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 1181 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 1182 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 1183 delete Operands[1]; 1184 Operands.erase(Operands.begin() + 1); 1185 } 1186 } 1187 } 1188 1189 // Transforms "int $3" into "int3" as a size optimization. We can't write an 1190 // instalias with an immediate operand yet. 1191 if (Name == "int" && Operands.size() == 2) { 1192 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 1193 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 1194 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) { 1195 delete Operands[1]; 1196 Operands.erase(Operands.begin() + 1); 1197 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3"); 1198 } 1199 } 1200 1201 return false; 1202} 1203 1204bool X86AsmParser:: 1205processInstruction(MCInst &Inst, 1206 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { 1207 switch (Inst.getOpcode()) { 1208 default: return false; 1209 case X86::AND16i16: { 1210 if (!Inst.getOperand(0).isImm() || 1211 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1212 return false; 1213 1214 MCInst TmpInst; 1215 TmpInst.setOpcode(X86::AND16ri8); 1216 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1217 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1218 TmpInst.addOperand(Inst.getOperand(0)); 1219 Inst = TmpInst; 1220 return true; 1221 } 1222 case X86::AND32i32: { 1223 if (!Inst.getOperand(0).isImm() || 1224 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1225 return false; 1226 1227 MCInst TmpInst; 1228 TmpInst.setOpcode(X86::AND32ri8); 1229 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1230 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1231 TmpInst.addOperand(Inst.getOperand(0)); 1232 Inst = TmpInst; 1233 return true; 1234 } 1235 case X86::AND64i32: { 1236 if (!Inst.getOperand(0).isImm() || 1237 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1238 return false; 1239 1240 MCInst TmpInst; 1241 TmpInst.setOpcode(X86::AND64ri8); 1242 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1243 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1244 TmpInst.addOperand(Inst.getOperand(0)); 1245 Inst = TmpInst; 1246 return true; 1247 } 1248 case X86::XOR16i16: { 1249 if (!Inst.getOperand(0).isImm() || 1250 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1251 return false; 1252 1253 MCInst TmpInst; 1254 TmpInst.setOpcode(X86::XOR16ri8); 1255 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1256 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1257 TmpInst.addOperand(Inst.getOperand(0)); 1258 Inst = TmpInst; 1259 return true; 1260 } 1261 case X86::XOR32i32: { 1262 if (!Inst.getOperand(0).isImm() || 1263 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1264 return false; 1265 1266 MCInst TmpInst; 1267 TmpInst.setOpcode(X86::XOR32ri8); 1268 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1269 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1270 TmpInst.addOperand(Inst.getOperand(0)); 1271 Inst = TmpInst; 1272 return true; 1273 } 1274 case X86::XOR64i32: { 1275 if (!Inst.getOperand(0).isImm() || 1276 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1277 return false; 1278 1279 MCInst TmpInst; 1280 TmpInst.setOpcode(X86::XOR64ri8); 1281 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1282 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1283 TmpInst.addOperand(Inst.getOperand(0)); 1284 Inst = TmpInst; 1285 return true; 1286 } 1287 case X86::OR16i16: { 1288 if (!Inst.getOperand(0).isImm() || 1289 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1290 return false; 1291 1292 MCInst TmpInst; 1293 TmpInst.setOpcode(X86::OR16ri8); 1294 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1295 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1296 TmpInst.addOperand(Inst.getOperand(0)); 1297 Inst = TmpInst; 1298 return true; 1299 } 1300 case X86::OR32i32: { 1301 if (!Inst.getOperand(0).isImm() || 1302 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1303 return false; 1304 1305 MCInst TmpInst; 1306 TmpInst.setOpcode(X86::OR32ri8); 1307 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1308 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1309 TmpInst.addOperand(Inst.getOperand(0)); 1310 Inst = TmpInst; 1311 return true; 1312 } 1313 case X86::OR64i32: { 1314 if (!Inst.getOperand(0).isImm() || 1315 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1316 return false; 1317 1318 MCInst TmpInst; 1319 TmpInst.setOpcode(X86::OR64ri8); 1320 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1321 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1322 TmpInst.addOperand(Inst.getOperand(0)); 1323 Inst = TmpInst; 1324 return true; 1325 } 1326 case X86::CMP16i16: { 1327 if (!Inst.getOperand(0).isImm() || 1328 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1329 return false; 1330 1331 MCInst TmpInst; 1332 TmpInst.setOpcode(X86::CMP16ri8); 1333 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1334 TmpInst.addOperand(Inst.getOperand(0)); 1335 Inst = TmpInst; 1336 return true; 1337 } 1338 case X86::CMP32i32: { 1339 if (!Inst.getOperand(0).isImm() || 1340 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1341 return false; 1342 1343 MCInst TmpInst; 1344 TmpInst.setOpcode(X86::CMP32ri8); 1345 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1346 TmpInst.addOperand(Inst.getOperand(0)); 1347 Inst = TmpInst; 1348 return true; 1349 } 1350 case X86::CMP64i32: { 1351 if (!Inst.getOperand(0).isImm() || 1352 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1353 return false; 1354 1355 MCInst TmpInst; 1356 TmpInst.setOpcode(X86::CMP64ri8); 1357 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1358 TmpInst.addOperand(Inst.getOperand(0)); 1359 Inst = TmpInst; 1360 return true; 1361 } 1362 case X86::ADD16i16: { 1363 if (!Inst.getOperand(0).isImm() || 1364 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1365 return false; 1366 1367 MCInst TmpInst; 1368 TmpInst.setOpcode(X86::ADD16ri8); 1369 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1370 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1371 TmpInst.addOperand(Inst.getOperand(0)); 1372 Inst = TmpInst; 1373 return true; 1374 } 1375 case X86::ADD32i32: { 1376 if (!Inst.getOperand(0).isImm() || 1377 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1378 return false; 1379 1380 MCInst TmpInst; 1381 TmpInst.setOpcode(X86::ADD32ri8); 1382 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1383 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1384 TmpInst.addOperand(Inst.getOperand(0)); 1385 Inst = TmpInst; 1386 return true; 1387 } 1388 case X86::ADD64i32: { 1389 if (!Inst.getOperand(0).isImm() || 1390 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1391 return false; 1392 1393 MCInst TmpInst; 1394 TmpInst.setOpcode(X86::ADD64ri8); 1395 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1396 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1397 TmpInst.addOperand(Inst.getOperand(0)); 1398 Inst = TmpInst; 1399 return true; 1400 } 1401 case X86::SUB16i16: { 1402 if (!Inst.getOperand(0).isImm() || 1403 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1404 return false; 1405 1406 MCInst TmpInst; 1407 TmpInst.setOpcode(X86::SUB16ri8); 1408 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1409 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1410 TmpInst.addOperand(Inst.getOperand(0)); 1411 Inst = TmpInst; 1412 return true; 1413 } 1414 case X86::SUB32i32: { 1415 if (!Inst.getOperand(0).isImm() || 1416 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1417 return false; 1418 1419 MCInst TmpInst; 1420 TmpInst.setOpcode(X86::SUB32ri8); 1421 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1422 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1423 TmpInst.addOperand(Inst.getOperand(0)); 1424 Inst = TmpInst; 1425 return true; 1426 } 1427 case X86::SUB64i32: { 1428 if (!Inst.getOperand(0).isImm() || 1429 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1430 return false; 1431 1432 MCInst TmpInst; 1433 TmpInst.setOpcode(X86::SUB64ri8); 1434 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1435 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1436 TmpInst.addOperand(Inst.getOperand(0)); 1437 Inst = TmpInst; 1438 return true; 1439 } 1440 } 1441 return false; 1442} 1443 1444bool X86AsmParser:: 1445MatchAndEmitInstruction(SMLoc IDLoc, 1446 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 1447 MCStreamer &Out) { 1448 assert(!Operands.empty() && "Unexpect empty operand list!"); 1449 X86Operand *Op = static_cast<X86Operand*>(Operands[0]); 1450 assert(Op->isToken() && "Leading operand should always be a mnemonic!"); 1451 1452 // First, handle aliases that expand to multiple instructions. 1453 // FIXME: This should be replaced with a real .td file alias mechanism. 1454 // Also, MatchInstructionImpl should do actually *do* the EmitInstruction 1455 // call. 1456 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || 1457 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || 1458 Op->getToken() == "finit" || Op->getToken() == "fsave" || 1459 Op->getToken() == "fstenv" || Op->getToken() == "fclex") { 1460 MCInst Inst; 1461 Inst.setOpcode(X86::WAIT); 1462 Out.EmitInstruction(Inst); 1463 1464 const char *Repl = 1465 StringSwitch<const char*>(Op->getToken()) 1466 .Case("finit", "fninit") 1467 .Case("fsave", "fnsave") 1468 .Case("fstcw", "fnstcw") 1469 .Case("fstcww", "fnstcw") 1470 .Case("fstenv", "fnstenv") 1471 .Case("fstsw", "fnstsw") 1472 .Case("fstsww", "fnstsw") 1473 .Case("fclex", "fnclex") 1474 .Default(0); 1475 assert(Repl && "Unknown wait-prefixed instruction"); 1476 delete Operands[0]; 1477 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 1478 } 1479 1480 bool WasOriginallyInvalidOperand = false; 1481 unsigned OrigErrorInfo; 1482 MCInst Inst; 1483 1484 // First, try a direct match. 1485 switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo, 1486 getParser().getAssemblerDialect())) { 1487 default: break; 1488 case Match_Success: 1489 // Some instructions need post-processing to, for example, tweak which 1490 // encoding is selected. Loop on it while changes happen so the 1491 // individual transformations can chain off each other. 1492 while (processInstruction(Inst, Operands)) 1493 ; 1494 1495 Out.EmitInstruction(Inst); 1496 return false; 1497 case Match_MissingFeature: 1498 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 1499 return true; 1500 case Match_ConversionFail: 1501 return Error(IDLoc, "unable to convert operands to instruction"); 1502 case Match_InvalidOperand: 1503 WasOriginallyInvalidOperand = true; 1504 break; 1505 case Match_MnemonicFail: 1506 break; 1507 } 1508 1509 // FIXME: Ideally, we would only attempt suffix matches for things which are 1510 // valid prefixes, and we could just infer the right unambiguous 1511 // type. However, that requires substantially more matcher support than the 1512 // following hack. 1513 1514 // Change the operand to point to a temporary token. 1515 StringRef Base = Op->getToken(); 1516 SmallString<16> Tmp; 1517 Tmp += Base; 1518 Tmp += ' '; 1519 Op->setTokenValue(Tmp.str()); 1520 1521 // If this instruction starts with an 'f', then it is a floating point stack 1522 // instruction. These come in up to three forms for 32-bit, 64-bit, and 1523 // 80-bit floating point, which use the suffixes s,l,t respectively. 1524 // 1525 // Otherwise, we assume that this may be an integer instruction, which comes 1526 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 1527 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 1528 1529 // Check for the various suffix matches. 1530 Tmp[Base.size()] = Suffixes[0]; 1531 unsigned ErrorInfoIgnore; 1532 unsigned Match1, Match2, Match3, Match4; 1533 1534 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); 1535 Tmp[Base.size()] = Suffixes[1]; 1536 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); 1537 Tmp[Base.size()] = Suffixes[2]; 1538 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); 1539 Tmp[Base.size()] = Suffixes[3]; 1540 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); 1541 1542 // Restore the old token. 1543 Op->setTokenValue(Base); 1544 1545 // If exactly one matched, then we treat that as a successful match (and the 1546 // instruction will already have been filled in correctly, since the failing 1547 // matches won't have modified it). 1548 unsigned NumSuccessfulMatches = 1549 (Match1 == Match_Success) + (Match2 == Match_Success) + 1550 (Match3 == Match_Success) + (Match4 == Match_Success); 1551 if (NumSuccessfulMatches == 1) { 1552 Out.EmitInstruction(Inst); 1553 return false; 1554 } 1555 1556 // Otherwise, the match failed, try to produce a decent error message. 1557 1558 // If we had multiple suffix matches, then identify this as an ambiguous 1559 // match. 1560 if (NumSuccessfulMatches > 1) { 1561 char MatchChars[4]; 1562 unsigned NumMatches = 0; 1563 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0]; 1564 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1]; 1565 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2]; 1566 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3]; 1567 1568 SmallString<126> Msg; 1569 raw_svector_ostream OS(Msg); 1570 OS << "ambiguous instructions require an explicit suffix (could be "; 1571 for (unsigned i = 0; i != NumMatches; ++i) { 1572 if (i != 0) 1573 OS << ", "; 1574 if (i + 1 == NumMatches) 1575 OS << "or "; 1576 OS << "'" << Base << MatchChars[i] << "'"; 1577 } 1578 OS << ")"; 1579 Error(IDLoc, OS.str()); 1580 return true; 1581 } 1582 1583 // Okay, we know that none of the variants matched successfully. 1584 1585 // If all of the instructions reported an invalid mnemonic, then the original 1586 // mnemonic was invalid. 1587 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && 1588 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { 1589 if (!WasOriginallyInvalidOperand) { 1590 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 1591 Op->getLocRange()); 1592 } 1593 1594 // Recover location info for the operand if we know which was the problem. 1595 if (OrigErrorInfo != ~0U) { 1596 if (OrigErrorInfo >= Operands.size()) 1597 return Error(IDLoc, "too few operands for instruction"); 1598 1599 X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo]; 1600 if (Operand->getStartLoc().isValid()) { 1601 SMRange OperandRange = Operand->getLocRange(); 1602 return Error(Operand->getStartLoc(), "invalid operand for instruction", 1603 OperandRange); 1604 } 1605 } 1606 1607 return Error(IDLoc, "invalid operand for instruction"); 1608 } 1609 1610 // If one instruction matched with a missing feature, report this as a 1611 // missing feature. 1612 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + 1613 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ 1614 Error(IDLoc, "instruction requires a CPU feature not currently enabled"); 1615 return true; 1616 } 1617 1618 // If one instruction matched with an invalid operand, report this as an 1619 // operand failure. 1620 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + 1621 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ 1622 Error(IDLoc, "invalid operand for instruction"); 1623 return true; 1624 } 1625 1626 // If all of these were an outright failure, report it in a useless way. 1627 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); 1628 return true; 1629} 1630 1631 1632bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 1633 StringRef IDVal = DirectiveID.getIdentifier(); 1634 if (IDVal == ".word") 1635 return ParseDirectiveWord(2, DirectiveID.getLoc()); 1636 else if (IDVal.startswith(".code")) 1637 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 1638 return true; 1639} 1640 1641/// ParseDirectiveWord 1642/// ::= .word [ expression (, expression)* ] 1643bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 1644 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1645 for (;;) { 1646 const MCExpr *Value; 1647 if (getParser().ParseExpression(Value)) 1648 return true; 1649 1650 getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); 1651 1652 if (getLexer().is(AsmToken::EndOfStatement)) 1653 break; 1654 1655 // FIXME: Improve diagnostic. 1656 if (getLexer().isNot(AsmToken::Comma)) 1657 return Error(L, "unexpected token in directive"); 1658 Parser.Lex(); 1659 } 1660 } 1661 1662 Parser.Lex(); 1663 return false; 1664} 1665 1666/// ParseDirectiveCode 1667/// ::= .code32 | .code64 1668bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 1669 if (IDVal == ".code32") { 1670 Parser.Lex(); 1671 if (is64BitMode()) { 1672 SwitchMode(); 1673 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 1674 } 1675 } else if (IDVal == ".code64") { 1676 Parser.Lex(); 1677 if (!is64BitMode()) { 1678 SwitchMode(); 1679 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); 1680 } 1681 } else { 1682 return Error(L, "unexpected directive " + IDVal); 1683 } 1684 1685 return false; 1686} 1687 1688 1689extern "C" void LLVMInitializeX86AsmLexer(); 1690 1691// Force static initialization. 1692extern "C" void LLVMInitializeX86AsmParser() { 1693 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target); 1694 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target); 1695 LLVMInitializeX86AsmLexer(); 1696} 1697 1698#define GET_REGISTER_MATCHER 1699#define GET_MATCHER_IMPLEMENTATION 1700#include "X86GenAsmMatcher.inc" 1701