X86AsmParser.cpp revision 12ce0de4622df7bcc15ba6c8818b98c0b936876a
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "llvm/Target/TargetAsmParser.h" 11#include "X86.h" 12#include "llvm/ADT/SmallVector.h" 13#include "llvm/ADT/Twine.h" 14#include "llvm/MC/MCStreamer.h" 15#include "llvm/MC/MCExpr.h" 16#include "llvm/MC/MCInst.h" 17#include "llvm/MC/MCParser/MCAsmLexer.h" 18#include "llvm/MC/MCParser/MCAsmParser.h" 19#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 20#include "llvm/Support/SourceMgr.h" 21#include "llvm/Target/TargetRegistry.h" 22#include "llvm/Target/TargetAsmParser.h" 23using namespace llvm; 24 25namespace { 26struct X86Operand; 27 28class X86ATTAsmParser : public TargetAsmParser { 29 MCAsmParser &Parser; 30 31private: 32 MCAsmParser &getParser() const { return Parser; } 33 34 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 35 36 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } 37 38 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 39 40 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); 41 42 X86Operand *ParseOperand(); 43 X86Operand *ParseMemOperand(); 44 45 bool ParseDirectiveWord(unsigned Size, SMLoc L); 46 47 /// @name Auto-generated Match Functions 48 /// { 49 50 bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, 51 MCInst &Inst); 52 53 /// } 54 55public: 56 X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) 57 : TargetAsmParser(T), Parser(_Parser) {} 58 59 virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, 60 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 61 62 virtual bool ParseDirective(AsmToken DirectiveID); 63}; 64 65} // end anonymous namespace 66 67/// @name Auto-generated Match Functions 68/// { 69 70static unsigned MatchRegisterName(const StringRef &Name); 71 72/// } 73 74namespace { 75 76/// X86Operand - Instances of this class represent a parsed X86 machine 77/// instruction. 78struct X86Operand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Register, 82 Immediate, 83 Memory 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 88 union { 89 struct { 90 const char *Data; 91 unsigned Length; 92 } Tok; 93 94 struct { 95 unsigned RegNo; 96 } Reg; 97 98 struct { 99 const MCExpr *Val; 100 } Imm; 101 102 struct { 103 unsigned SegReg; 104 const MCExpr *Disp; 105 unsigned BaseReg; 106 unsigned IndexReg; 107 unsigned Scale; 108 } Mem; 109 }; 110 111 X86Operand(KindTy K, SMLoc Start, SMLoc End) 112 : Kind(K), StartLoc(Start), EndLoc(End) {} 113 114 /// getStartLoc - Get the location of the first token of this operand. 115 SMLoc getStartLoc() const { return StartLoc; } 116 /// getEndLoc - Get the location of the last token of this operand. 117 SMLoc getEndLoc() const { return EndLoc; } 118 119 StringRef getToken() const { 120 assert(Kind == Token && "Invalid access!"); 121 return StringRef(Tok.Data, Tok.Length); 122 } 123 124 unsigned getReg() const { 125 assert(Kind == Register && "Invalid access!"); 126 return Reg.RegNo; 127 } 128 129 const MCExpr *getImm() const { 130 assert(Kind == Immediate && "Invalid access!"); 131 return Imm.Val; 132 } 133 134 const MCExpr *getMemDisp() const { 135 assert(Kind == Memory && "Invalid access!"); 136 return Mem.Disp; 137 } 138 unsigned getMemSegReg() const { 139 assert(Kind == Memory && "Invalid access!"); 140 return Mem.SegReg; 141 } 142 unsigned getMemBaseReg() const { 143 assert(Kind == Memory && "Invalid access!"); 144 return Mem.BaseReg; 145 } 146 unsigned getMemIndexReg() const { 147 assert(Kind == Memory && "Invalid access!"); 148 return Mem.IndexReg; 149 } 150 unsigned getMemScale() const { 151 assert(Kind == Memory && "Invalid access!"); 152 return Mem.Scale; 153 } 154 155 bool isToken() const {return Kind == Token; } 156 157 bool isImm() const { return Kind == Immediate; } 158 159 bool isImmSExt8() const { 160 // Accept immediates which fit in 8 bits when sign extended, and 161 // non-absolute immediates. 162 if (!isImm()) 163 return false; 164 165 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { 166 int64_t Value = CE->getValue(); 167 return Value == (int64_t) (int8_t) Value; 168 } 169 170 return true; 171 } 172 173 bool isMem() const { return Kind == Memory; } 174 175 bool isAbsMem() const { 176 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && 177 !getMemIndexReg() && getMemScale() == 1; 178 } 179 180 bool isNoSegMem() const { 181 return Kind == Memory && !getMemSegReg(); 182 } 183 184 bool isReg() const { return Kind == Register; } 185 186 void addRegOperands(MCInst &Inst, unsigned N) const { 187 assert(N == 1 && "Invalid number of operands!"); 188 Inst.addOperand(MCOperand::CreateReg(getReg())); 189 } 190 191 void addImmOperands(MCInst &Inst, unsigned N) const { 192 assert(N == 1 && "Invalid number of operands!"); 193 Inst.addOperand(MCOperand::CreateExpr(getImm())); 194 } 195 196 void addImmSExt8Operands(MCInst &Inst, unsigned N) const { 197 // FIXME: Support user customization of the render method. 198 assert(N == 1 && "Invalid number of operands!"); 199 Inst.addOperand(MCOperand::CreateExpr(getImm())); 200 } 201 202 void addMemOperands(MCInst &Inst, unsigned N) const { 203 assert((N == 5) && "Invalid number of operands!"); 204 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 205 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 206 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 207 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 208 Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); 209 } 210 211 void addAbsMemOperands(MCInst &Inst, unsigned N) const { 212 assert((N == 1) && "Invalid number of operands!"); 213 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 214 } 215 216 void addNoSegMemOperands(MCInst &Inst, unsigned N) const { 217 assert((N == 4) && "Invalid number of operands!"); 218 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 219 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 220 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 221 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 222 } 223 224 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { 225 X86Operand *Res = new X86Operand(Token, Loc, Loc); 226 Res->Tok.Data = Str.data(); 227 Res->Tok.Length = Str.size(); 228 return Res; 229 } 230 231 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) { 232 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); 233 Res->Reg.RegNo = RegNo; 234 return Res; 235 } 236 237 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ 238 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); 239 Res->Imm.Val = Val; 240 return Res; 241 } 242 243 /// Create an absolute memory operand. 244 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, 245 SMLoc EndLoc) { 246 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 247 Res->Mem.SegReg = 0; 248 Res->Mem.Disp = Disp; 249 Res->Mem.BaseReg = 0; 250 Res->Mem.IndexReg = 0; 251 Res->Mem.Scale = 1; 252 return Res; 253 } 254 255 /// Create a generalized memory operand. 256 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, 257 unsigned BaseReg, unsigned IndexReg, 258 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) { 259 // We should never just have a displacement, that should be parsed as an 260 // absolute memory operand. 261 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); 262 263 // The scale should always be one of {1,2,4,8}. 264 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && 265 "Invalid scale!"); 266 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 267 Res->Mem.SegReg = SegReg; 268 Res->Mem.Disp = Disp; 269 Res->Mem.BaseReg = BaseReg; 270 Res->Mem.IndexReg = IndexReg; 271 Res->Mem.Scale = Scale; 272 return Res; 273 } 274}; 275 276} // end anonymous namespace. 277 278 279bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, 280 SMLoc &StartLoc, SMLoc &EndLoc) { 281 RegNo = 0; 282 const AsmToken &TokPercent = Parser.getTok(); 283 assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); 284 StartLoc = TokPercent.getLoc(); 285 Parser.Lex(); // Eat percent token. 286 287 const AsmToken &Tok = Parser.getTok(); 288 if (Tok.isNot(AsmToken::Identifier)) 289 return Error(Tok.getLoc(), "invalid register name"); 290 291 // FIXME: Validate register for the current architecture; we have to do 292 // validation later, so maybe there is no need for this here. 293 RegNo = MatchRegisterName(Tok.getString()); 294 if (RegNo == 0) 295 return Error(Tok.getLoc(), "invalid register name"); 296 297 EndLoc = Tok.getLoc(); 298 Parser.Lex(); // Eat identifier token. 299 return false; 300} 301 302X86Operand *X86ATTAsmParser::ParseOperand() { 303 switch (getLexer().getKind()) { 304 default: 305 return ParseMemOperand(); 306 case AsmToken::Percent: { 307 // FIXME: if a segment register, this could either be just the seg reg, or 308 // the start of a memory operand. 309 unsigned RegNo; 310 SMLoc Start, End; 311 if (ParseRegister(RegNo, Start, End)) return 0; 312 return X86Operand::CreateReg(RegNo, Start, End); 313 } 314 case AsmToken::Dollar: { 315 // $42 -> immediate. 316 SMLoc Start = Parser.getTok().getLoc(), End; 317 Parser.Lex(); 318 const MCExpr *Val; 319 if (getParser().ParseExpression(Val, End)) 320 return 0; 321 return X86Operand::CreateImm(Val, Start, End); 322 } 323 } 324} 325 326/// ParseMemOperand: segment: disp(basereg, indexreg, scale) 327X86Operand *X86ATTAsmParser::ParseMemOperand() { 328 SMLoc MemStart = Parser.getTok().getLoc(); 329 330 // FIXME: If SegReg ':' (e.g. %gs:), eat and remember. 331 unsigned SegReg = 0; 332 333 // We have to disambiguate a parenthesized expression "(4+5)" from the start 334 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 335 // only way to do this without lookahead is to eat the '(' and see what is 336 // after it. 337 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 338 if (getLexer().isNot(AsmToken::LParen)) { 339 SMLoc ExprEnd; 340 if (getParser().ParseExpression(Disp, ExprEnd)) return 0; 341 342 // After parsing the base expression we could either have a parenthesized 343 // memory address or not. If not, return now. If so, eat the (. 344 if (getLexer().isNot(AsmToken::LParen)) { 345 // Unless we have a segment register, treat this as an immediate. 346 if (SegReg == 0) 347 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 348 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 349 } 350 351 // Eat the '('. 352 Parser.Lex(); 353 } else { 354 // Okay, we have a '('. We don't know if this is an expression or not, but 355 // so we have to eat the ( to see beyond it. 356 SMLoc LParenLoc = Parser.getTok().getLoc(); 357 Parser.Lex(); // Eat the '('. 358 359 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 360 // Nothing to do here, fall into the code below with the '(' part of the 361 // memory operand consumed. 362 } else { 363 SMLoc ExprEnd; 364 365 // It must be an parenthesized expression, parse it now. 366 if (getParser().ParseParenExpression(Disp, ExprEnd)) 367 return 0; 368 369 // After parsing the base expression we could either have a parenthesized 370 // memory address or not. If not, return now. If so, eat the (. 371 if (getLexer().isNot(AsmToken::LParen)) { 372 // Unless we have a segment register, treat this as an immediate. 373 if (SegReg == 0) 374 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 375 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 376 } 377 378 // Eat the '('. 379 Parser.Lex(); 380 } 381 } 382 383 // If we reached here, then we just ate the ( of the memory operand. Process 384 // the rest of the memory operand. 385 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 386 387 if (getLexer().is(AsmToken::Percent)) { 388 SMLoc L; 389 if (ParseRegister(BaseReg, L, L)) return 0; 390 } 391 392 if (getLexer().is(AsmToken::Comma)) { 393 Parser.Lex(); // Eat the comma. 394 395 // Following the comma we should have either an index register, or a scale 396 // value. We don't support the later form, but we want to parse it 397 // correctly. 398 // 399 // Not that even though it would be completely consistent to support syntax 400 // like "1(%eax,,1)", the assembler doesn't. 401 if (getLexer().is(AsmToken::Percent)) { 402 SMLoc L; 403 if (ParseRegister(IndexReg, L, L)) return 0; 404 405 if (getLexer().isNot(AsmToken::RParen)) { 406 // Parse the scale amount: 407 // ::= ',' [scale-expression] 408 if (getLexer().isNot(AsmToken::Comma)) { 409 Error(Parser.getTok().getLoc(), 410 "expected comma in scale expression"); 411 return 0; 412 } 413 Parser.Lex(); // Eat the comma. 414 415 if (getLexer().isNot(AsmToken::RParen)) { 416 SMLoc Loc = Parser.getTok().getLoc(); 417 418 int64_t ScaleVal; 419 if (getParser().ParseAbsoluteExpression(ScaleVal)) 420 return 0; 421 422 // Validate the scale amount. 423 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 424 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 425 return 0; 426 } 427 Scale = (unsigned)ScaleVal; 428 } 429 } 430 } else if (getLexer().isNot(AsmToken::RParen)) { 431 // Otherwise we have the unsupported form of a scale amount without an 432 // index. 433 SMLoc Loc = Parser.getTok().getLoc(); 434 435 int64_t Value; 436 if (getParser().ParseAbsoluteExpression(Value)) 437 return 0; 438 439 Error(Loc, "cannot have scale factor without index register"); 440 return 0; 441 } 442 } 443 444 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 445 if (getLexer().isNot(AsmToken::RParen)) { 446 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 447 return 0; 448 } 449 SMLoc MemEnd = Parser.getTok().getLoc(); 450 Parser.Lex(); // Eat the ')'. 451 452 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 453 MemStart, MemEnd); 454} 455 456bool X86ATTAsmParser:: 457ParseInstruction(const StringRef &Name, SMLoc NameLoc, 458 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 459 // FIXME: Hack to recognize "sal..." for now. We need a way to represent 460 // alternative syntaxes in the .td file, without requiring instruction 461 // duplication. 462 if (Name.startswith("sal")) { 463 std::string Tmp = "shl" + Name.substr(3).str(); 464 Operands.push_back(X86Operand::CreateToken(Tmp, NameLoc)); 465 } else { 466 // FIXME: This is a hack. We eventually want to add a general pattern 467 // mechanism to be used in the table gen file for these assembly names that 468 // use the same opcodes. Also we should only allow the "alternate names" 469 // for rep and repne with the instructions they can only appear with. 470 StringRef PatchedName = Name; 471 if (Name == "repe" || Name == "repz") 472 PatchedName = "rep"; 473 else if (Name == "repnz") 474 PatchedName = "repne"; 475 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 476 } 477 478 if (getLexer().isNot(AsmToken::EndOfStatement)) { 479 480 // Parse '*' modifier. 481 if (getLexer().is(AsmToken::Star)) { 482 SMLoc Loc = Parser.getTok().getLoc(); 483 Operands.push_back(X86Operand::CreateToken("*", Loc)); 484 Parser.Lex(); // Eat the star. 485 } 486 487 // Read the first operand. 488 if (X86Operand *Op = ParseOperand()) 489 Operands.push_back(Op); 490 else 491 return true; 492 493 while (getLexer().is(AsmToken::Comma)) { 494 Parser.Lex(); // Eat the comma. 495 496 // Parse and remember the operand. 497 if (X86Operand *Op = ParseOperand()) 498 Operands.push_back(Op); 499 else 500 return true; 501 } 502 } 503 504 return false; 505} 506 507bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { 508 StringRef IDVal = DirectiveID.getIdentifier(); 509 if (IDVal == ".word") 510 return ParseDirectiveWord(2, DirectiveID.getLoc()); 511 return true; 512} 513 514/// ParseDirectiveWord 515/// ::= .word [ expression (, expression)* ] 516bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 517 if (getLexer().isNot(AsmToken::EndOfStatement)) { 518 for (;;) { 519 const MCExpr *Value; 520 if (getParser().ParseExpression(Value)) 521 return true; 522 523 getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); 524 525 if (getLexer().is(AsmToken::EndOfStatement)) 526 break; 527 528 // FIXME: Improve diagnostic. 529 if (getLexer().isNot(AsmToken::Comma)) 530 return Error(L, "unexpected token in directive"); 531 Parser.Lex(); 532 } 533 } 534 535 Parser.Lex(); 536 return false; 537} 538 539extern "C" void LLVMInitializeX86AsmLexer(); 540 541// Force static initialization. 542extern "C" void LLVMInitializeX86AsmParser() { 543 RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); 544 RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); 545 LLVMInitializeX86AsmLexer(); 546} 547 548#include "X86GenAsmMatcher.inc" 549