X86AsmParser.cpp revision cf50a5390c09325a7fc41640449205eced4363f6
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/Target/TargetAsmParser.h"
11#include "X86.h"
12#include "llvm/ADT/SmallVector.h"
13#include "llvm/ADT/StringSwitch.h"
14#include "llvm/ADT/Twine.h"
15#include "llvm/MC/MCStreamer.h"
16#include "llvm/MC/MCExpr.h"
17#include "llvm/MC/MCInst.h"
18#include "llvm/MC/MCParser/MCAsmLexer.h"
19#include "llvm/MC/MCParser/MCAsmParser.h"
20#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
21#include "llvm/Support/SourceMgr.h"
22#include "llvm/Target/TargetRegistry.h"
23#include "llvm/Target/TargetAsmParser.h"
24using namespace llvm;
25
26namespace {
27struct X86Operand;
28
29class X86ATTAsmParser : public TargetAsmParser {
30  MCAsmParser &Parser;
31
32protected:
33  unsigned Is64Bit : 1;
34
35private:
36  MCAsmParser &getParser() const { return Parser; }
37
38  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
39
40  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
41
42  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
43
44  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
45
46  X86Operand *ParseOperand();
47  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
48
49  bool ParseDirectiveWord(unsigned Size, SMLoc L);
50
51  void InstructionCleanup(MCInst &Inst);
52
53  /// @name Auto-generated Match Functions
54  /// {
55
56  bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
57                        MCInst &Inst);
58
59  bool MatchInstructionImpl(
60    const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
61
62  /// }
63
64public:
65  X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
66    : TargetAsmParser(T), Parser(_Parser) {}
67
68  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
69                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
70
71  virtual bool ParseDirective(AsmToken DirectiveID);
72};
73
74class X86_32ATTAsmParser : public X86ATTAsmParser {
75public:
76  X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser)
77    : X86ATTAsmParser(T, _Parser) {
78    Is64Bit = false;
79  }
80};
81
82class X86_64ATTAsmParser : public X86ATTAsmParser {
83public:
84  X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser)
85    : X86ATTAsmParser(T, _Parser) {
86    Is64Bit = true;
87  }
88};
89
90} // end anonymous namespace
91
92/// @name Auto-generated Match Functions
93/// {
94
95static unsigned MatchRegisterName(StringRef Name);
96
97/// }
98
99namespace {
100
101/// X86Operand - Instances of this class represent a parsed X86 machine
102/// instruction.
103struct X86Operand : public MCParsedAsmOperand {
104  enum KindTy {
105    Token,
106    Register,
107    Immediate,
108    Memory
109  } Kind;
110
111  SMLoc StartLoc, EndLoc;
112
113  union {
114    struct {
115      const char *Data;
116      unsigned Length;
117    } Tok;
118
119    struct {
120      unsigned RegNo;
121    } Reg;
122
123    struct {
124      const MCExpr *Val;
125    } Imm;
126
127    struct {
128      unsigned SegReg;
129      const MCExpr *Disp;
130      unsigned BaseReg;
131      unsigned IndexReg;
132      unsigned Scale;
133    } Mem;
134  };
135
136  X86Operand(KindTy K, SMLoc Start, SMLoc End)
137    : Kind(K), StartLoc(Start), EndLoc(End) {}
138
139  /// getStartLoc - Get the location of the first token of this operand.
140  SMLoc getStartLoc() const { return StartLoc; }
141  /// getEndLoc - Get the location of the last token of this operand.
142  SMLoc getEndLoc() const { return EndLoc; }
143
144  StringRef getToken() const {
145    assert(Kind == Token && "Invalid access!");
146    return StringRef(Tok.Data, Tok.Length);
147  }
148  void setTokenValue(StringRef Value) {
149    assert(Kind == Token && "Invalid access!");
150    Tok.Data = Value.data();
151    Tok.Length = Value.size();
152  }
153
154  unsigned getReg() const {
155    assert(Kind == Register && "Invalid access!");
156    return Reg.RegNo;
157  }
158
159  const MCExpr *getImm() const {
160    assert(Kind == Immediate && "Invalid access!");
161    return Imm.Val;
162  }
163
164  const MCExpr *getMemDisp() const {
165    assert(Kind == Memory && "Invalid access!");
166    return Mem.Disp;
167  }
168  unsigned getMemSegReg() const {
169    assert(Kind == Memory && "Invalid access!");
170    return Mem.SegReg;
171  }
172  unsigned getMemBaseReg() const {
173    assert(Kind == Memory && "Invalid access!");
174    return Mem.BaseReg;
175  }
176  unsigned getMemIndexReg() const {
177    assert(Kind == Memory && "Invalid access!");
178    return Mem.IndexReg;
179  }
180  unsigned getMemScale() const {
181    assert(Kind == Memory && "Invalid access!");
182    return Mem.Scale;
183  }
184
185  bool isToken() const {return Kind == Token; }
186
187  bool isImm() const { return Kind == Immediate; }
188
189  bool isImmSExti16i8() const {
190    if (!isImm())
191      return false;
192
193    // If this isn't a constant expr, just assume it fits and let relaxation
194    // handle it.
195    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
196    if (!CE)
197      return true;
198
199    // Otherwise, check the value is in a range that makes sense for this
200    // extension.
201    uint64_t Value = CE->getValue();
202    return ((                                  Value <= 0x000000000000007FULL)||
203            (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
204            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
205  }
206  bool isImmSExti32i8() const {
207    if (!isImm())
208      return false;
209
210    // If this isn't a constant expr, just assume it fits and let relaxation
211    // handle it.
212    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
213    if (!CE)
214      return true;
215
216    // Otherwise, check the value is in a range that makes sense for this
217    // extension.
218    uint64_t Value = CE->getValue();
219    return ((                                  Value <= 0x000000000000007FULL)||
220            (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
221            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
222  }
223  bool isImmSExti64i8() const {
224    if (!isImm())
225      return false;
226
227    // If this isn't a constant expr, just assume it fits and let relaxation
228    // handle it.
229    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
230    if (!CE)
231      return true;
232
233    // Otherwise, check the value is in a range that makes sense for this
234    // extension.
235    uint64_t Value = CE->getValue();
236    return ((                                  Value <= 0x000000000000007FULL)||
237            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
238  }
239  bool isImmSExti64i32() const {
240    if (!isImm())
241      return false;
242
243    // If this isn't a constant expr, just assume it fits and let relaxation
244    // handle it.
245    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
246    if (!CE)
247      return true;
248
249    // Otherwise, check the value is in a range that makes sense for this
250    // extension.
251    uint64_t Value = CE->getValue();
252    return ((                                  Value <= 0x000000007FFFFFFFULL)||
253            (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
254  }
255
256  bool isMem() const { return Kind == Memory; }
257
258  bool isAbsMem() const {
259    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
260      !getMemIndexReg() && getMemScale() == 1;
261  }
262
263  bool isNoSegMem() const {
264    return Kind == Memory && !getMemSegReg();
265  }
266
267  bool isReg() const { return Kind == Register; }
268
269  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
270    // Add as immediates when possible.
271    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
272      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
273    else
274      Inst.addOperand(MCOperand::CreateExpr(Expr));
275  }
276
277  void addRegOperands(MCInst &Inst, unsigned N) const {
278    assert(N == 1 && "Invalid number of operands!");
279    Inst.addOperand(MCOperand::CreateReg(getReg()));
280  }
281
282  void addImmOperands(MCInst &Inst, unsigned N) const {
283    assert(N == 1 && "Invalid number of operands!");
284    addExpr(Inst, getImm());
285  }
286
287  void addMemOperands(MCInst &Inst, unsigned N) const {
288    assert((N == 5) && "Invalid number of operands!");
289    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
290    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
291    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
292    addExpr(Inst, getMemDisp());
293    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
294  }
295
296  void addAbsMemOperands(MCInst &Inst, unsigned N) const {
297    assert((N == 1) && "Invalid number of operands!");
298    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
299  }
300
301  void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
302    assert((N == 4) && "Invalid number of operands!");
303    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
304    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
305    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
306    addExpr(Inst, getMemDisp());
307  }
308
309  static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
310    X86Operand *Res = new X86Operand(Token, Loc, Loc);
311    Res->Tok.Data = Str.data();
312    Res->Tok.Length = Str.size();
313    return Res;
314  }
315
316  static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
317    X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
318    Res->Reg.RegNo = RegNo;
319    return Res;
320  }
321
322  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
323    X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
324    Res->Imm.Val = Val;
325    return Res;
326  }
327
328  /// Create an absolute memory operand.
329  static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
330                               SMLoc EndLoc) {
331    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
332    Res->Mem.SegReg   = 0;
333    Res->Mem.Disp     = Disp;
334    Res->Mem.BaseReg  = 0;
335    Res->Mem.IndexReg = 0;
336    Res->Mem.Scale    = 1;
337    return Res;
338  }
339
340  /// Create a generalized memory operand.
341  static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
342                               unsigned BaseReg, unsigned IndexReg,
343                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
344    // We should never just have a displacement, that should be parsed as an
345    // absolute memory operand.
346    assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
347
348    // The scale should always be one of {1,2,4,8}.
349    assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
350           "Invalid scale!");
351    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
352    Res->Mem.SegReg   = SegReg;
353    Res->Mem.Disp     = Disp;
354    Res->Mem.BaseReg  = BaseReg;
355    Res->Mem.IndexReg = IndexReg;
356    Res->Mem.Scale    = Scale;
357    return Res;
358  }
359};
360
361} // end anonymous namespace.
362
363
364bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
365                                    SMLoc &StartLoc, SMLoc &EndLoc) {
366  RegNo = 0;
367  const AsmToken &TokPercent = Parser.getTok();
368  assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
369  StartLoc = TokPercent.getLoc();
370  Parser.Lex(); // Eat percent token.
371
372  const AsmToken &Tok = Parser.getTok();
373  if (Tok.isNot(AsmToken::Identifier))
374    return Error(Tok.getLoc(), "invalid register name");
375
376  // FIXME: Validate register for the current architecture; we have to do
377  // validation later, so maybe there is no need for this here.
378  RegNo = MatchRegisterName(Tok.getString());
379
380  // Parse %st(1) and "%st" as "%st(0)"
381  if (RegNo == 0 && Tok.getString() == "st") {
382    RegNo = X86::ST0;
383    EndLoc = Tok.getLoc();
384    Parser.Lex(); // Eat 'st'
385
386    // Check to see if we have '(4)' after %st.
387    if (getLexer().isNot(AsmToken::LParen))
388      return false;
389    // Lex the paren.
390    getParser().Lex();
391
392    const AsmToken &IntTok = Parser.getTok();
393    if (IntTok.isNot(AsmToken::Integer))
394      return Error(IntTok.getLoc(), "expected stack index");
395    switch (IntTok.getIntVal()) {
396    case 0: RegNo = X86::ST0; break;
397    case 1: RegNo = X86::ST1; break;
398    case 2: RegNo = X86::ST2; break;
399    case 3: RegNo = X86::ST3; break;
400    case 4: RegNo = X86::ST4; break;
401    case 5: RegNo = X86::ST5; break;
402    case 6: RegNo = X86::ST6; break;
403    case 7: RegNo = X86::ST7; break;
404    default: return Error(IntTok.getLoc(), "invalid stack index");
405    }
406
407    if (getParser().Lex().isNot(AsmToken::RParen))
408      return Error(Parser.getTok().getLoc(), "expected ')'");
409
410    EndLoc = Tok.getLoc();
411    Parser.Lex(); // Eat ')'
412    return false;
413  }
414
415  if (RegNo == 0)
416    return Error(Tok.getLoc(), "invalid register name");
417
418  EndLoc = Tok.getLoc();
419  Parser.Lex(); // Eat identifier token.
420  return false;
421}
422
423X86Operand *X86ATTAsmParser::ParseOperand() {
424  switch (getLexer().getKind()) {
425  default:
426    // Parse a memory operand with no segment register.
427    return ParseMemOperand(0, Parser.getTok().getLoc());
428  case AsmToken::Percent: {
429    // Read the register.
430    unsigned RegNo;
431    SMLoc Start, End;
432    if (ParseRegister(RegNo, Start, End)) return 0;
433
434    // If this is a segment register followed by a ':', then this is the start
435    // of a memory reference, otherwise this is a normal register reference.
436    if (getLexer().isNot(AsmToken::Colon))
437      return X86Operand::CreateReg(RegNo, Start, End);
438
439
440    getParser().Lex(); // Eat the colon.
441    return ParseMemOperand(RegNo, Start);
442  }
443  case AsmToken::Dollar: {
444    // $42 -> immediate.
445    SMLoc Start = Parser.getTok().getLoc(), End;
446    Parser.Lex();
447    const MCExpr *Val;
448    if (getParser().ParseExpression(Val, End))
449      return 0;
450    return X86Operand::CreateImm(Val, Start, End);
451  }
452  }
453}
454
455/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
456/// has already been parsed if present.
457X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
458
459  // We have to disambiguate a parenthesized expression "(4+5)" from the start
460  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
461  // only way to do this without lookahead is to eat the '(' and see what is
462  // after it.
463  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
464  if (getLexer().isNot(AsmToken::LParen)) {
465    SMLoc ExprEnd;
466    if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
467
468    // After parsing the base expression we could either have a parenthesized
469    // memory address or not.  If not, return now.  If so, eat the (.
470    if (getLexer().isNot(AsmToken::LParen)) {
471      // Unless we have a segment register, treat this as an immediate.
472      if (SegReg == 0)
473        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
474      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
475    }
476
477    // Eat the '('.
478    Parser.Lex();
479  } else {
480    // Okay, we have a '('.  We don't know if this is an expression or not, but
481    // so we have to eat the ( to see beyond it.
482    SMLoc LParenLoc = Parser.getTok().getLoc();
483    Parser.Lex(); // Eat the '('.
484
485    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
486      // Nothing to do here, fall into the code below with the '(' part of the
487      // memory operand consumed.
488    } else {
489      SMLoc ExprEnd;
490
491      // It must be an parenthesized expression, parse it now.
492      if (getParser().ParseParenExpression(Disp, ExprEnd))
493        return 0;
494
495      // After parsing the base expression we could either have a parenthesized
496      // memory address or not.  If not, return now.  If so, eat the (.
497      if (getLexer().isNot(AsmToken::LParen)) {
498        // Unless we have a segment register, treat this as an immediate.
499        if (SegReg == 0)
500          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
501        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
502      }
503
504      // Eat the '('.
505      Parser.Lex();
506    }
507  }
508
509  // If we reached here, then we just ate the ( of the memory operand.  Process
510  // the rest of the memory operand.
511  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
512
513  if (getLexer().is(AsmToken::Percent)) {
514    SMLoc L;
515    if (ParseRegister(BaseReg, L, L)) return 0;
516  }
517
518  if (getLexer().is(AsmToken::Comma)) {
519    Parser.Lex(); // Eat the comma.
520
521    // Following the comma we should have either an index register, or a scale
522    // value. We don't support the later form, but we want to parse it
523    // correctly.
524    //
525    // Not that even though it would be completely consistent to support syntax
526    // like "1(%eax,,1)", the assembler doesn't.
527    if (getLexer().is(AsmToken::Percent)) {
528      SMLoc L;
529      if (ParseRegister(IndexReg, L, L)) return 0;
530
531      if (getLexer().isNot(AsmToken::RParen)) {
532        // Parse the scale amount:
533        //  ::= ',' [scale-expression]
534        if (getLexer().isNot(AsmToken::Comma)) {
535          Error(Parser.getTok().getLoc(),
536                "expected comma in scale expression");
537          return 0;
538        }
539        Parser.Lex(); // Eat the comma.
540
541        if (getLexer().isNot(AsmToken::RParen)) {
542          SMLoc Loc = Parser.getTok().getLoc();
543
544          int64_t ScaleVal;
545          if (getParser().ParseAbsoluteExpression(ScaleVal))
546            return 0;
547
548          // Validate the scale amount.
549          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
550            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
551            return 0;
552          }
553          Scale = (unsigned)ScaleVal;
554        }
555      }
556    } else if (getLexer().isNot(AsmToken::RParen)) {
557      // Otherwise we have the unsupported form of a scale amount without an
558      // index.
559      SMLoc Loc = Parser.getTok().getLoc();
560
561      int64_t Value;
562      if (getParser().ParseAbsoluteExpression(Value))
563        return 0;
564
565      Error(Loc, "cannot have scale factor without index register");
566      return 0;
567    }
568  }
569
570  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
571  if (getLexer().isNot(AsmToken::RParen)) {
572    Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
573    return 0;
574  }
575  SMLoc MemEnd = Parser.getTok().getLoc();
576  Parser.Lex(); // Eat the ')'.
577
578  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
579                               MemStart, MemEnd);
580}
581
582bool X86ATTAsmParser::
583ParseInstruction(const StringRef &Name, SMLoc NameLoc,
584                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
585  // The various flavors of pushf and popf use Requires<In32BitMode> and
586  // Requires<In64BitMode>, but the assembler doesn't yet implement that.
587  // For now, just do a manual check to prevent silent misencoding.
588  if (Is64Bit) {
589    if (Name == "popfl")
590      return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
591    else if (Name == "pushfl")
592      return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
593  } else {
594    if (Name == "popfq")
595      return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
596    else if (Name == "pushfq")
597      return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
598  }
599
600  // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
601  // represent alternative syntaxes in the .td file, without requiring
602  // instruction duplication.
603  StringRef PatchedName = StringSwitch<StringRef>(Name)
604    .Case("sal", "shl")
605    .Case("salb", "shlb")
606    .Case("sall", "shll")
607    .Case("salq", "shlq")
608    .Case("salw", "shlw")
609    .Case("repe", "rep")
610    .Case("repz", "rep")
611    .Case("repnz", "repne")
612    .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
613    .Case("popf",  Is64Bit ? "popfq"  : "popfl")
614    .Case("retl", Is64Bit ? "retl" : "ret")
615    .Case("retq", Is64Bit ? "ret" : "retq")
616    .Case("setz", "sete")
617    .Case("setnz", "setne")
618    .Case("jz", "je")
619    .Case("jnz", "jne")
620    .Case("cmovcl", "cmovbl")
621    .Case("cmovcl", "cmovbl")
622    .Case("cmovnal", "cmovbel")
623    .Case("cmovnbl", "cmovael")
624    .Case("cmovnbel", "cmoval")
625    .Case("cmovncl", "cmovael")
626    .Case("cmovngl", "cmovlel")
627    .Case("cmovnl", "cmovgel")
628    .Case("cmovngl", "cmovlel")
629    .Case("cmovngel", "cmovll")
630    .Case("cmovnll", "cmovgel")
631    .Case("cmovnlel", "cmovgl")
632    .Case("cmovnzl", "cmovnel")
633    .Case("cmovzl", "cmovel")
634    .Default(Name);
635
636  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
637  const MCExpr *ExtraImmOp = 0;
638  if (PatchedName.startswith("cmp") &&
639      (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
640       PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
641    unsigned SSEComparisonCode = StringSwitch<unsigned>(
642      PatchedName.slice(3, PatchedName.size() - 2))
643      .Case("eq", 0)
644      .Case("lt", 1)
645      .Case("le", 2)
646      .Case("unord", 3)
647      .Case("neq", 4)
648      .Case("nlt", 5)
649      .Case("nle", 6)
650      .Case("ord", 7)
651      .Default(~0U);
652    if (SSEComparisonCode != ~0U) {
653      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
654                                          getParser().getContext());
655      if (PatchedName.endswith("ss")) {
656        PatchedName = "cmpss";
657      } else if (PatchedName.endswith("sd")) {
658        PatchedName = "cmpsd";
659      } else if (PatchedName.endswith("ps")) {
660        PatchedName = "cmpps";
661      } else {
662        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
663        PatchedName = "cmppd";
664      }
665    }
666  }
667  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
668
669  if (ExtraImmOp)
670    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
671
672  if (getLexer().isNot(AsmToken::EndOfStatement)) {
673
674    // Parse '*' modifier.
675    if (getLexer().is(AsmToken::Star)) {
676      SMLoc Loc = Parser.getTok().getLoc();
677      Operands.push_back(X86Operand::CreateToken("*", Loc));
678      Parser.Lex(); // Eat the star.
679    }
680
681    // Read the first operand.
682    if (X86Operand *Op = ParseOperand())
683      Operands.push_back(Op);
684    else
685      return true;
686
687    while (getLexer().is(AsmToken::Comma)) {
688      Parser.Lex();  // Eat the comma.
689
690      // Parse and remember the operand.
691      if (X86Operand *Op = ParseOperand())
692        Operands.push_back(Op);
693      else
694        return true;
695    }
696  }
697
698  // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
699  if ((Name.startswith("shr") || Name.startswith("sar") ||
700       Name.startswith("shl")) &&
701      Operands.size() == 3 &&
702      static_cast<X86Operand*>(Operands[1])->isImm() &&
703      isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
704      cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
705    delete Operands[1];
706    Operands.erase(Operands.begin() + 1);
707  }
708
709  // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
710  // "f{mul*,add*,sub*,div*} $op"
711  if ((Name.startswith("fmul") || Name.startswith("fadd") ||
712       Name.startswith("fsub") || Name.startswith("fdiv")) &&
713      Operands.size() == 3 &&
714      static_cast<X86Operand*>(Operands[2])->isReg() &&
715      static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
716    delete Operands[2];
717    Operands.erase(Operands.begin() + 2);
718  }
719
720  return false;
721}
722
723bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
724  StringRef IDVal = DirectiveID.getIdentifier();
725  if (IDVal == ".word")
726    return ParseDirectiveWord(2, DirectiveID.getLoc());
727  return true;
728}
729
730/// ParseDirectiveWord
731///  ::= .word [ expression (, expression)* ]
732bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
733  if (getLexer().isNot(AsmToken::EndOfStatement)) {
734    for (;;) {
735      const MCExpr *Value;
736      if (getParser().ParseExpression(Value))
737        return true;
738
739      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
740
741      if (getLexer().is(AsmToken::EndOfStatement))
742        break;
743
744      // FIXME: Improve diagnostic.
745      if (getLexer().isNot(AsmToken::Comma))
746        return Error(L, "unexpected token in directive");
747      Parser.Lex();
748    }
749  }
750
751  Parser.Lex();
752  return false;
753}
754
755/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
756/// imm operand, to having "rm" or "mr" operands with the offset in the disp
757/// field.
758static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
759                         bool isMR) {
760  MCOperand Disp = Inst.getOperand(0);
761
762  // Start over with an empty instruction.
763  Inst = MCInst();
764  Inst.setOpcode(Opc);
765
766  if (!isMR)
767    Inst.addOperand(MCOperand::CreateReg(RegNo));
768
769  // Add the mem operand.
770  Inst.addOperand(MCOperand::CreateReg(0));  // Segment
771  Inst.addOperand(MCOperand::CreateImm(1));  // Scale
772  Inst.addOperand(MCOperand::CreateReg(0));  // IndexReg
773  Inst.addOperand(Disp);                     // Displacement
774  Inst.addOperand(MCOperand::CreateReg(0));  // BaseReg
775
776  if (isMR)
777    Inst.addOperand(MCOperand::CreateReg(RegNo));
778}
779
780// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
781// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
782// proper mechanism for supporting (ambiguous) feature dependent instructions.
783void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
784  if (!Is64Bit) return;
785
786  switch (Inst.getOpcode()) {
787  case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
788  case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
789  case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
790  case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
791  case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
792  case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
793  case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
794  case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
795
796  // moffset instructions are x86-32 only.
797  case X86::MOV8o8a:   LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
798  case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
799  case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
800  case X86::MOV8ao8:   LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
801  case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
802  case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
803  }
804}
805
806bool
807X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
808                                    &Operands,
809                                  MCInst &Inst) {
810  // First, try a direct match.
811  if (!MatchInstructionImpl(Operands, Inst))
812    return false;
813
814  // Ignore anything which is obviously not a suffix match.
815  if (Operands.size() == 0)
816    return true;
817  X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
818  if (!Op->isToken() || Op->getToken().size() > 15)
819    return true;
820
821  // FIXME: Ideally, we would only attempt suffix matches for things which are
822  // valid prefixes, and we could just infer the right unambiguous
823  // type. However, that requires substantially more matcher support than the
824  // following hack.
825
826  // Change the operand to point to a temporary token.
827  char Tmp[16];
828  StringRef Base = Op->getToken();
829  memcpy(Tmp, Base.data(), Base.size());
830  Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
831
832  // Check for the various suffix matches.
833  Tmp[Base.size()] = 'b';
834  bool MatchB = MatchInstructionImpl(Operands, Inst);
835  Tmp[Base.size()] = 'w';
836  bool MatchW = MatchInstructionImpl(Operands, Inst);
837  Tmp[Base.size()] = 'l';
838  bool MatchL = MatchInstructionImpl(Operands, Inst);
839  Tmp[Base.size()] = 'q';
840  bool MatchQ = MatchInstructionImpl(Operands, Inst);
841
842  // Restore the old token.
843  Op->setTokenValue(Base);
844
845  // If exactly one matched, then we treat that as a successful match (and the
846  // instruction will already have been filled in correctly, since the failing
847  // matches won't have modified it).
848  if (MatchB + MatchW + MatchL + MatchQ == 3)
849    return false;
850
851  // Otherwise, the match failed.
852  return true;
853}
854
855
856extern "C" void LLVMInitializeX86AsmLexer();
857
858// Force static initialization.
859extern "C" void LLVMInitializeX86AsmParser() {
860  RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
861  RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
862  LLVMInitializeX86AsmLexer();
863}
864
865#include "X86GenAsmMatcher.inc"
866