X86AsmParser.cpp revision 23075746a1418f281bc2a088ea85560bfd833599
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "llvm/Target/TargetAsmParser.h" 11#include "X86.h" 12#include "llvm/ADT/SmallVector.h" 13#include "llvm/ADT/Twine.h" 14#include "llvm/MC/MCAsmLexer.h" 15#include "llvm/MC/MCAsmParser.h" 16#include "llvm/MC/MCStreamer.h" 17#include "llvm/MC/MCExpr.h" 18#include "llvm/MC/MCInst.h" 19#include "llvm/MC/MCParsedAsmOperand.h" 20#include "llvm/Support/SourceMgr.h" 21#include "llvm/Target/TargetRegistry.h" 22#include "llvm/Target/TargetAsmParser.h" 23using namespace llvm; 24 25namespace { 26struct X86Operand; 27 28class X86ATTAsmParser : public TargetAsmParser { 29 MCAsmParser &Parser; 30 31private: 32 MCAsmParser &getParser() const { return Parser; } 33 34 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 35 36 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } 37 38 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } 39 40 bool ParseRegister(unsigned &RegNo); 41 42 bool ParseOperand(X86Operand &Op); 43 44 bool ParseMemOperand(X86Operand &Op); 45 46 bool ParseDirectiveWord(unsigned Size, SMLoc L); 47 48 /// @name Auto-generated Match Functions 49 /// { 50 51 bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, 52 MCInst &Inst); 53 54 /// MatchRegisterName - Match the given string to a register name, or 0 if 55 /// there is no match. 56 unsigned MatchRegisterName(const StringRef &Name); 57 58 /// } 59 60public: 61 X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) 62 : TargetAsmParser(T), Parser(_Parser) {} 63 64 virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, 65 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 66 67 virtual bool ParseDirective(AsmToken DirectiveID); 68}; 69 70} // end anonymous namespace 71 72 73namespace { 74 75/// X86Operand - Instances of this class represent a parsed X86 machine 76/// instruction. 77struct X86Operand : public MCParsedAsmOperand { 78 enum { 79 Token, 80 Register, 81 Immediate, 82 Memory 83 } Kind; 84 85 union { 86 struct { 87 const char *Data; 88 unsigned Length; 89 } Tok; 90 91 struct { 92 unsigned RegNo; 93 } Reg; 94 95 struct { 96 const MCExpr *Val; 97 } Imm; 98 99 struct { 100 unsigned SegReg; 101 const MCExpr *Disp; 102 unsigned BaseReg; 103 unsigned IndexReg; 104 unsigned Scale; 105 } Mem; 106 }; 107 108 StringRef getToken() const { 109 assert(Kind == Token && "Invalid access!"); 110 return StringRef(Tok.Data, Tok.Length); 111 } 112 113 unsigned getReg() const { 114 assert(Kind == Register && "Invalid access!"); 115 return Reg.RegNo; 116 } 117 118 const MCExpr *getImm() const { 119 assert(Kind == Immediate && "Invalid access!"); 120 return Imm.Val; 121 } 122 123 const MCExpr *getMemDisp() const { 124 assert(Kind == Memory && "Invalid access!"); 125 return Mem.Disp; 126 } 127 unsigned getMemSegReg() const { 128 assert(Kind == Memory && "Invalid access!"); 129 return Mem.SegReg; 130 } 131 unsigned getMemBaseReg() const { 132 assert(Kind == Memory && "Invalid access!"); 133 return Mem.BaseReg; 134 } 135 unsigned getMemIndexReg() const { 136 assert(Kind == Memory && "Invalid access!"); 137 return Mem.IndexReg; 138 } 139 unsigned getMemScale() const { 140 assert(Kind == Memory && "Invalid access!"); 141 return Mem.Scale; 142 } 143 144 bool isToken() const {return Kind == Token; } 145 146 bool isImm() const { return Kind == Immediate; } 147 148 bool isImmSExt8() const { 149 // Accept immediates which fit in 8 bits when sign extended, and 150 // non-absolute immediates. 151 if (!isImm()) 152 return false; 153 154 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { 155 int64_t Value = CE->getValue(); 156 return Value == (int64_t) (int8_t) Value; 157 } 158 159 return true; 160 } 161 162 bool isMem() const { return Kind == Memory; } 163 164 bool isReg() const { return Kind == Register; } 165 166 void addRegOperands(MCInst &Inst, unsigned N) const { 167 assert(N == 1 && "Invalid number of operands!"); 168 Inst.addOperand(MCOperand::CreateReg(getReg())); 169 } 170 171 void addImmOperands(MCInst &Inst, unsigned N) const { 172 assert(N == 1 && "Invalid number of operands!"); 173 Inst.addOperand(MCOperand::CreateExpr(getImm())); 174 } 175 176 void addImmSExt8Operands(MCInst &Inst, unsigned N) const { 177 // FIXME: Support user customization of the render method. 178 assert(N == 1 && "Invalid number of operands!"); 179 Inst.addOperand(MCOperand::CreateExpr(getImm())); 180 } 181 182 void addMemOperands(MCInst &Inst, unsigned N) const { 183 assert((N == 4 || N == 5) && "Invalid number of operands!"); 184 185 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 186 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 187 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 188 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 189 190 // FIXME: What a hack. 191 if (N == 5) 192 Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); 193 } 194 195 static X86Operand CreateToken(StringRef Str) { 196 X86Operand Res; 197 Res.Kind = Token; 198 Res.Tok.Data = Str.data(); 199 Res.Tok.Length = Str.size(); 200 return Res; 201 } 202 203 static X86Operand CreateReg(unsigned RegNo) { 204 X86Operand Res; 205 Res.Kind = Register; 206 Res.Reg.RegNo = RegNo; 207 return Res; 208 } 209 210 static X86Operand CreateImm(const MCExpr *Val) { 211 X86Operand Res; 212 Res.Kind = Immediate; 213 Res.Imm.Val = Val; 214 return Res; 215 } 216 217 static X86Operand CreateMem(unsigned SegReg, const MCExpr *Disp, 218 unsigned BaseReg, unsigned IndexReg, 219 unsigned Scale) { 220 // We should never just have a displacement, that would be an immediate. 221 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); 222 223 // The scale should always be one of {1,2,4,8}. 224 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && 225 "Invalid scale!"); 226 X86Operand Res; 227 Res.Kind = Memory; 228 Res.Mem.SegReg = SegReg; 229 Res.Mem.Disp = Disp; 230 Res.Mem.BaseReg = BaseReg; 231 Res.Mem.IndexReg = IndexReg; 232 Res.Mem.Scale = Scale; 233 return Res; 234 } 235}; 236 237} // end anonymous namespace. 238 239 240bool X86ATTAsmParser::ParseRegister(unsigned &RegNo) { 241 RegNo = 0; 242 const AsmToken &TokPercent = getLexer().getTok(); 243 (void)TokPercent; // Avoid warning when assertions are disabled. 244 assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); 245 getLexer().Lex(); // Eat percent token. 246 247 const AsmToken &Tok = getLexer().getTok(); 248 if (Tok.isNot(AsmToken::Identifier)) 249 return Error(Tok.getLoc(), "invalid register name"); 250 251 // FIXME: Validate register for the current architecture; we have to do 252 // validation later, so maybe there is no need for this here. 253 RegNo = MatchRegisterName(Tok.getString()); 254 if (RegNo == 0) 255 return Error(Tok.getLoc(), "invalid register name"); 256 257 getLexer().Lex(); // Eat identifier token. 258 259 return false; 260} 261 262bool X86ATTAsmParser::ParseOperand(X86Operand &Op) { 263 switch (getLexer().getKind()) { 264 default: 265 return ParseMemOperand(Op); 266 case AsmToken::Percent: { 267 // FIXME: if a segment register, this could either be just the seg reg, or 268 // the start of a memory operand. 269 unsigned RegNo; 270 if (ParseRegister(RegNo)) return true; 271 Op = X86Operand::CreateReg(RegNo); 272 return false; 273 } 274 case AsmToken::Dollar: { 275 // $42 -> immediate. 276 getLexer().Lex(); 277 const MCExpr *Val; 278 if (getParser().ParseExpression(Val)) 279 return true; 280 Op = X86Operand::CreateImm(Val); 281 return false; 282 } 283 } 284} 285 286/// ParseMemOperand: segment: disp(basereg, indexreg, scale) 287bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) { 288 // FIXME: If SegReg ':' (e.g. %gs:), eat and remember. 289 unsigned SegReg = 0; 290 291 // We have to disambiguate a parenthesized expression "(4+5)" from the start 292 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 293 // only way to do this without lookahead is to eat the ( and see what is after 294 // it. 295 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 296 if (getLexer().isNot(AsmToken::LParen)) { 297 if (getParser().ParseExpression(Disp)) return true; 298 299 // After parsing the base expression we could either have a parenthesized 300 // memory address or not. If not, return now. If so, eat the (. 301 if (getLexer().isNot(AsmToken::LParen)) { 302 // Unless we have a segment register, treat this as an immediate. 303 if (SegReg) 304 Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 1); 305 else 306 Op = X86Operand::CreateImm(Disp); 307 return false; 308 } 309 310 // Eat the '('. 311 getLexer().Lex(); 312 } else { 313 // Okay, we have a '('. We don't know if this is an expression or not, but 314 // so we have to eat the ( to see beyond it. 315 getLexer().Lex(); // Eat the '('. 316 317 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 318 // Nothing to do here, fall into the code below with the '(' part of the 319 // memory operand consumed. 320 } else { 321 // It must be an parenthesized expression, parse it now. 322 if (getParser().ParseParenExpression(Disp)) 323 return true; 324 325 // After parsing the base expression we could either have a parenthesized 326 // memory address or not. If not, return now. If so, eat the (. 327 if (getLexer().isNot(AsmToken::LParen)) { 328 // Unless we have a segment register, treat this as an immediate. 329 if (SegReg) 330 Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 1); 331 else 332 Op = X86Operand::CreateImm(Disp); 333 return false; 334 } 335 336 // Eat the '('. 337 getLexer().Lex(); 338 } 339 } 340 341 // If we reached here, then we just ate the ( of the memory operand. Process 342 // the rest of the memory operand. 343 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 344 345 if (getLexer().is(AsmToken::Percent)) 346 if (ParseRegister(BaseReg)) return true; 347 348 if (getLexer().is(AsmToken::Comma)) { 349 getLexer().Lex(); // Eat the comma. 350 351 // Following the comma we should have either an index register, or a scale 352 // value. We don't support the later form, but we want to parse it 353 // correctly. 354 // 355 // Not that even though it would be completely consistent to support syntax 356 // like "1(%eax,,1)", the assembler doesn't. 357 if (getLexer().is(AsmToken::Percent)) { 358 if (ParseRegister(IndexReg)) return true; 359 360 if (getLexer().isNot(AsmToken::RParen)) { 361 // Parse the scale amount: 362 // ::= ',' [scale-expression] 363 if (getLexer().isNot(AsmToken::Comma)) 364 return true; 365 getLexer().Lex(); // Eat the comma. 366 367 if (getLexer().isNot(AsmToken::RParen)) { 368 SMLoc Loc = getLexer().getTok().getLoc(); 369 370 int64_t ScaleVal; 371 if (getParser().ParseAbsoluteExpression(ScaleVal)) 372 return true; 373 374 // Validate the scale amount. 375 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8) 376 return Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 377 Scale = (unsigned)ScaleVal; 378 } 379 } 380 } else if (getLexer().isNot(AsmToken::RParen)) { 381 // Otherwise we have the unsupported form of a scale amount without an 382 // index. 383 SMLoc Loc = getLexer().getTok().getLoc(); 384 385 int64_t Value; 386 if (getParser().ParseAbsoluteExpression(Value)) 387 return true; 388 389 return Error(Loc, "cannot have scale factor without index register"); 390 } 391 } 392 393 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 394 if (getLexer().isNot(AsmToken::RParen)) 395 return Error(getLexer().getTok().getLoc(), 396 "unexpected token in memory operand"); 397 getLexer().Lex(); // Eat the ')'. 398 399 Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale); 400 return false; 401} 402 403bool X86ATTAsmParser:: 404ParseInstruction(const StringRef &Name, SMLoc NameLoc, 405 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 406 407 Operands.push_back(new X86Operand(X86Operand::CreateToken(Name))); 408 409 SMLoc Loc = getLexer().getTok().getLoc(); 410 if (getLexer().isNot(AsmToken::EndOfStatement)) { 411 412 // Parse '*' modifier. 413 if (getLexer().is(AsmToken::Star)) { 414 getLexer().Lex(); // Eat the star. 415 Operands.push_back(new X86Operand(X86Operand::CreateToken("*"))); 416 } 417 418 // Read the first operand. 419 X86Operand Op; 420 if (ParseOperand(Op)) 421 return true; 422 423 Operands.push_back(new X86Operand(Op)); 424 425 while (getLexer().is(AsmToken::Comma)) { 426 getLexer().Lex(); // Eat the comma. 427 428 // Parse and remember the operand. 429 if (ParseOperand(Op)) 430 return true; 431 Operands.push_back(new X86Operand(Op)); 432 } 433 } 434 435 return false; 436} 437 438bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { 439 StringRef IDVal = DirectiveID.getIdentifier(); 440 if (IDVal == ".word") 441 return ParseDirectiveWord(2, DirectiveID.getLoc()); 442 return true; 443} 444 445/// ParseDirectiveWord 446/// ::= .word [ expression (, expression)* ] 447bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 448 if (getLexer().isNot(AsmToken::EndOfStatement)) { 449 for (;;) { 450 const MCExpr *Value; 451 if (getParser().ParseExpression(Value)) 452 return true; 453 454 getParser().getStreamer().EmitValue(Value, Size); 455 456 if (getLexer().is(AsmToken::EndOfStatement)) 457 break; 458 459 // FIXME: Improve diagnostic. 460 if (getLexer().isNot(AsmToken::Comma)) 461 return Error(L, "unexpected token in directive"); 462 getLexer().Lex(); 463 } 464 } 465 466 getLexer().Lex(); 467 return false; 468} 469 470// Force static initialization. 471extern "C" void LLVMInitializeX86AsmParser() { 472 RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); 473 RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); 474} 475 476#include "X86GenAsmMatcher.inc" 477