X86AsmParser.cpp revision 12ce0de4622df7bcc15ba6c8818b98c0b936876a
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/Target/TargetAsmParser.h"
11#include "X86.h"
12#include "llvm/ADT/SmallVector.h"
13#include "llvm/ADT/Twine.h"
14#include "llvm/MC/MCStreamer.h"
15#include "llvm/MC/MCExpr.h"
16#include "llvm/MC/MCInst.h"
17#include "llvm/MC/MCParser/MCAsmLexer.h"
18#include "llvm/MC/MCParser/MCAsmParser.h"
19#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
20#include "llvm/Support/SourceMgr.h"
21#include "llvm/Target/TargetRegistry.h"
22#include "llvm/Target/TargetAsmParser.h"
23using namespace llvm;
24
25namespace {
26struct X86Operand;
27
28class X86ATTAsmParser : public TargetAsmParser {
29  MCAsmParser &Parser;
30
31private:
32  MCAsmParser &getParser() const { return Parser; }
33
34  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
35
36  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
37
38  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
39
40  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
41
42  X86Operand *ParseOperand();
43  X86Operand *ParseMemOperand();
44
45  bool ParseDirectiveWord(unsigned Size, SMLoc L);
46
47  /// @name Auto-generated Match Functions
48  /// {
49
50  bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
51                        MCInst &Inst);
52
53  /// }
54
55public:
56  X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
57    : TargetAsmParser(T), Parser(_Parser) {}
58
59  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
60                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
61
62  virtual bool ParseDirective(AsmToken DirectiveID);
63};
64
65} // end anonymous namespace
66
67/// @name Auto-generated Match Functions
68/// {
69
70static unsigned MatchRegisterName(const StringRef &Name);
71
72/// }
73
74namespace {
75
76/// X86Operand - Instances of this class represent a parsed X86 machine
77/// instruction.
78struct X86Operand : public MCParsedAsmOperand {
79  enum KindTy {
80    Token,
81    Register,
82    Immediate,
83    Memory
84  } Kind;
85
86  SMLoc StartLoc, EndLoc;
87
88  union {
89    struct {
90      const char *Data;
91      unsigned Length;
92    } Tok;
93
94    struct {
95      unsigned RegNo;
96    } Reg;
97
98    struct {
99      const MCExpr *Val;
100    } Imm;
101
102    struct {
103      unsigned SegReg;
104      const MCExpr *Disp;
105      unsigned BaseReg;
106      unsigned IndexReg;
107      unsigned Scale;
108    } Mem;
109  };
110
111  X86Operand(KindTy K, SMLoc Start, SMLoc End)
112    : Kind(K), StartLoc(Start), EndLoc(End) {}
113
114  /// getStartLoc - Get the location of the first token of this operand.
115  SMLoc getStartLoc() const { return StartLoc; }
116  /// getEndLoc - Get the location of the last token of this operand.
117  SMLoc getEndLoc() const { return EndLoc; }
118
119  StringRef getToken() const {
120    assert(Kind == Token && "Invalid access!");
121    return StringRef(Tok.Data, Tok.Length);
122  }
123
124  unsigned getReg() const {
125    assert(Kind == Register && "Invalid access!");
126    return Reg.RegNo;
127  }
128
129  const MCExpr *getImm() const {
130    assert(Kind == Immediate && "Invalid access!");
131    return Imm.Val;
132  }
133
134  const MCExpr *getMemDisp() const {
135    assert(Kind == Memory && "Invalid access!");
136    return Mem.Disp;
137  }
138  unsigned getMemSegReg() const {
139    assert(Kind == Memory && "Invalid access!");
140    return Mem.SegReg;
141  }
142  unsigned getMemBaseReg() const {
143    assert(Kind == Memory && "Invalid access!");
144    return Mem.BaseReg;
145  }
146  unsigned getMemIndexReg() const {
147    assert(Kind == Memory && "Invalid access!");
148    return Mem.IndexReg;
149  }
150  unsigned getMemScale() const {
151    assert(Kind == Memory && "Invalid access!");
152    return Mem.Scale;
153  }
154
155  bool isToken() const {return Kind == Token; }
156
157  bool isImm() const { return Kind == Immediate; }
158
159  bool isImmSExt8() const {
160    // Accept immediates which fit in 8 bits when sign extended, and
161    // non-absolute immediates.
162    if (!isImm())
163      return false;
164
165    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
166      int64_t Value = CE->getValue();
167      return Value == (int64_t) (int8_t) Value;
168    }
169
170    return true;
171  }
172
173  bool isMem() const { return Kind == Memory; }
174
175  bool isAbsMem() const {
176    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
177      !getMemIndexReg() && getMemScale() == 1;
178  }
179
180  bool isNoSegMem() const {
181    return Kind == Memory && !getMemSegReg();
182  }
183
184  bool isReg() const { return Kind == Register; }
185
186  void addRegOperands(MCInst &Inst, unsigned N) const {
187    assert(N == 1 && "Invalid number of operands!");
188    Inst.addOperand(MCOperand::CreateReg(getReg()));
189  }
190
191  void addImmOperands(MCInst &Inst, unsigned N) const {
192    assert(N == 1 && "Invalid number of operands!");
193    Inst.addOperand(MCOperand::CreateExpr(getImm()));
194  }
195
196  void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
197    // FIXME: Support user customization of the render method.
198    assert(N == 1 && "Invalid number of operands!");
199    Inst.addOperand(MCOperand::CreateExpr(getImm()));
200  }
201
202  void addMemOperands(MCInst &Inst, unsigned N) const {
203    assert((N == 5) && "Invalid number of operands!");
204    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
205    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
206    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
207    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
208    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
209  }
210
211  void addAbsMemOperands(MCInst &Inst, unsigned N) const {
212    assert((N == 1) && "Invalid number of operands!");
213    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
214  }
215
216  void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
217    assert((N == 4) && "Invalid number of operands!");
218    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
219    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
220    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
221    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
222  }
223
224  static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
225    X86Operand *Res = new X86Operand(Token, Loc, Loc);
226    Res->Tok.Data = Str.data();
227    Res->Tok.Length = Str.size();
228    return Res;
229  }
230
231  static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
232    X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
233    Res->Reg.RegNo = RegNo;
234    return Res;
235  }
236
237  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
238    X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
239    Res->Imm.Val = Val;
240    return Res;
241  }
242
243  /// Create an absolute memory operand.
244  static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
245                               SMLoc EndLoc) {
246    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
247    Res->Mem.SegReg   = 0;
248    Res->Mem.Disp     = Disp;
249    Res->Mem.BaseReg  = 0;
250    Res->Mem.IndexReg = 0;
251    Res->Mem.Scale    = 1;
252    return Res;
253  }
254
255  /// Create a generalized memory operand.
256  static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
257                               unsigned BaseReg, unsigned IndexReg,
258                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
259    // We should never just have a displacement, that should be parsed as an
260    // absolute memory operand.
261    assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
262
263    // The scale should always be one of {1,2,4,8}.
264    assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
265           "Invalid scale!");
266    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
267    Res->Mem.SegReg   = SegReg;
268    Res->Mem.Disp     = Disp;
269    Res->Mem.BaseReg  = BaseReg;
270    Res->Mem.IndexReg = IndexReg;
271    Res->Mem.Scale    = Scale;
272    return Res;
273  }
274};
275
276} // end anonymous namespace.
277
278
279bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
280                                    SMLoc &StartLoc, SMLoc &EndLoc) {
281  RegNo = 0;
282  const AsmToken &TokPercent = Parser.getTok();
283  assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
284  StartLoc = TokPercent.getLoc();
285  Parser.Lex(); // Eat percent token.
286
287  const AsmToken &Tok = Parser.getTok();
288  if (Tok.isNot(AsmToken::Identifier))
289    return Error(Tok.getLoc(), "invalid register name");
290
291  // FIXME: Validate register for the current architecture; we have to do
292  // validation later, so maybe there is no need for this here.
293  RegNo = MatchRegisterName(Tok.getString());
294  if (RegNo == 0)
295    return Error(Tok.getLoc(), "invalid register name");
296
297  EndLoc = Tok.getLoc();
298  Parser.Lex(); // Eat identifier token.
299  return false;
300}
301
302X86Operand *X86ATTAsmParser::ParseOperand() {
303  switch (getLexer().getKind()) {
304  default:
305    return ParseMemOperand();
306  case AsmToken::Percent: {
307    // FIXME: if a segment register, this could either be just the seg reg, or
308    // the start of a memory operand.
309    unsigned RegNo;
310    SMLoc Start, End;
311    if (ParseRegister(RegNo, Start, End)) return 0;
312    return X86Operand::CreateReg(RegNo, Start, End);
313  }
314  case AsmToken::Dollar: {
315    // $42 -> immediate.
316    SMLoc Start = Parser.getTok().getLoc(), End;
317    Parser.Lex();
318    const MCExpr *Val;
319    if (getParser().ParseExpression(Val, End))
320      return 0;
321    return X86Operand::CreateImm(Val, Start, End);
322  }
323  }
324}
325
326/// ParseMemOperand: segment: disp(basereg, indexreg, scale)
327X86Operand *X86ATTAsmParser::ParseMemOperand() {
328  SMLoc MemStart = Parser.getTok().getLoc();
329
330  // FIXME: If SegReg ':'  (e.g. %gs:), eat and remember.
331  unsigned SegReg = 0;
332
333  // We have to disambiguate a parenthesized expression "(4+5)" from the start
334  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
335  // only way to do this without lookahead is to eat the '(' and see what is
336  // after it.
337  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
338  if (getLexer().isNot(AsmToken::LParen)) {
339    SMLoc ExprEnd;
340    if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
341
342    // After parsing the base expression we could either have a parenthesized
343    // memory address or not.  If not, return now.  If so, eat the (.
344    if (getLexer().isNot(AsmToken::LParen)) {
345      // Unless we have a segment register, treat this as an immediate.
346      if (SegReg == 0)
347        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
348      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
349    }
350
351    // Eat the '('.
352    Parser.Lex();
353  } else {
354    // Okay, we have a '('.  We don't know if this is an expression or not, but
355    // so we have to eat the ( to see beyond it.
356    SMLoc LParenLoc = Parser.getTok().getLoc();
357    Parser.Lex(); // Eat the '('.
358
359    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
360      // Nothing to do here, fall into the code below with the '(' part of the
361      // memory operand consumed.
362    } else {
363      SMLoc ExprEnd;
364
365      // It must be an parenthesized expression, parse it now.
366      if (getParser().ParseParenExpression(Disp, ExprEnd))
367        return 0;
368
369      // After parsing the base expression we could either have a parenthesized
370      // memory address or not.  If not, return now.  If so, eat the (.
371      if (getLexer().isNot(AsmToken::LParen)) {
372        // Unless we have a segment register, treat this as an immediate.
373        if (SegReg == 0)
374          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
375        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
376      }
377
378      // Eat the '('.
379      Parser.Lex();
380    }
381  }
382
383  // If we reached here, then we just ate the ( of the memory operand.  Process
384  // the rest of the memory operand.
385  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
386
387  if (getLexer().is(AsmToken::Percent)) {
388    SMLoc L;
389    if (ParseRegister(BaseReg, L, L)) return 0;
390  }
391
392  if (getLexer().is(AsmToken::Comma)) {
393    Parser.Lex(); // Eat the comma.
394
395    // Following the comma we should have either an index register, or a scale
396    // value. We don't support the later form, but we want to parse it
397    // correctly.
398    //
399    // Not that even though it would be completely consistent to support syntax
400    // like "1(%eax,,1)", the assembler doesn't.
401    if (getLexer().is(AsmToken::Percent)) {
402      SMLoc L;
403      if (ParseRegister(IndexReg, L, L)) return 0;
404
405      if (getLexer().isNot(AsmToken::RParen)) {
406        // Parse the scale amount:
407        //  ::= ',' [scale-expression]
408        if (getLexer().isNot(AsmToken::Comma)) {
409          Error(Parser.getTok().getLoc(),
410                "expected comma in scale expression");
411          return 0;
412        }
413        Parser.Lex(); // Eat the comma.
414
415        if (getLexer().isNot(AsmToken::RParen)) {
416          SMLoc Loc = Parser.getTok().getLoc();
417
418          int64_t ScaleVal;
419          if (getParser().ParseAbsoluteExpression(ScaleVal))
420            return 0;
421
422          // Validate the scale amount.
423          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
424            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
425            return 0;
426          }
427          Scale = (unsigned)ScaleVal;
428        }
429      }
430    } else if (getLexer().isNot(AsmToken::RParen)) {
431      // Otherwise we have the unsupported form of a scale amount without an
432      // index.
433      SMLoc Loc = Parser.getTok().getLoc();
434
435      int64_t Value;
436      if (getParser().ParseAbsoluteExpression(Value))
437        return 0;
438
439      Error(Loc, "cannot have scale factor without index register");
440      return 0;
441    }
442  }
443
444  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
445  if (getLexer().isNot(AsmToken::RParen)) {
446    Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
447    return 0;
448  }
449  SMLoc MemEnd = Parser.getTok().getLoc();
450  Parser.Lex(); // Eat the ')'.
451
452  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
453                               MemStart, MemEnd);
454}
455
456bool X86ATTAsmParser::
457ParseInstruction(const StringRef &Name, SMLoc NameLoc,
458                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
459  // FIXME: Hack to recognize "sal..." for now. We need a way to represent
460  // alternative syntaxes in the .td file, without requiring instruction
461  // duplication.
462  if (Name.startswith("sal")) {
463    std::string Tmp = "shl" + Name.substr(3).str();
464    Operands.push_back(X86Operand::CreateToken(Tmp, NameLoc));
465  } else {
466    // FIXME: This is a hack.  We eventually want to add a general pattern
467    // mechanism to be used in the table gen file for these assembly names that
468    // use the same opcodes.  Also we should only allow the "alternate names"
469    // for rep and repne with the instructions they can only appear with.
470    StringRef PatchedName = Name;
471    if (Name == "repe" || Name == "repz")
472      PatchedName = "rep";
473    else if (Name == "repnz")
474      PatchedName = "repne";
475    Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
476  }
477
478  if (getLexer().isNot(AsmToken::EndOfStatement)) {
479
480    // Parse '*' modifier.
481    if (getLexer().is(AsmToken::Star)) {
482      SMLoc Loc = Parser.getTok().getLoc();
483      Operands.push_back(X86Operand::CreateToken("*", Loc));
484      Parser.Lex(); // Eat the star.
485    }
486
487    // Read the first operand.
488    if (X86Operand *Op = ParseOperand())
489      Operands.push_back(Op);
490    else
491      return true;
492
493    while (getLexer().is(AsmToken::Comma)) {
494      Parser.Lex();  // Eat the comma.
495
496      // Parse and remember the operand.
497      if (X86Operand *Op = ParseOperand())
498        Operands.push_back(Op);
499      else
500        return true;
501    }
502  }
503
504  return false;
505}
506
507bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
508  StringRef IDVal = DirectiveID.getIdentifier();
509  if (IDVal == ".word")
510    return ParseDirectiveWord(2, DirectiveID.getLoc());
511  return true;
512}
513
514/// ParseDirectiveWord
515///  ::= .word [ expression (, expression)* ]
516bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
517  if (getLexer().isNot(AsmToken::EndOfStatement)) {
518    for (;;) {
519      const MCExpr *Value;
520      if (getParser().ParseExpression(Value))
521        return true;
522
523      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
524
525      if (getLexer().is(AsmToken::EndOfStatement))
526        break;
527
528      // FIXME: Improve diagnostic.
529      if (getLexer().isNot(AsmToken::Comma))
530        return Error(L, "unexpected token in directive");
531      Parser.Lex();
532    }
533  }
534
535  Parser.Lex();
536  return false;
537}
538
539extern "C" void LLVMInitializeX86AsmLexer();
540
541// Force static initialization.
542extern "C" void LLVMInitializeX86AsmParser() {
543  RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
544  RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
545  LLVMInitializeX86AsmLexer();
546}
547
548#include "X86GenAsmMatcher.inc"
549