X86AsmParser.cpp revision ef63c9a9b6f79fef91dc144db9d5f217d2b83a95
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/Target/TargetAsmParser.h"
11#include "X86.h"
12#include "X86Subtarget.h"
13#include "llvm/ADT/SmallString.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/ADT/StringSwitch.h"
16#include "llvm/ADT/Twine.h"
17#include "llvm/MC/MCStreamer.h"
18#include "llvm/MC/MCExpr.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCParser/MCAsmLexer.h"
21#include "llvm/MC/MCParser/MCAsmParser.h"
22#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
23#include "llvm/Support/SourceMgr.h"
24#include "llvm/Support/raw_ostream.h"
25#include "llvm/Target/TargetRegistry.h"
26#include "llvm/Target/TargetAsmParser.h"
27using namespace llvm;
28
29namespace {
30struct X86Operand;
31
32class X86ATTAsmParser : public TargetAsmParser {
33  MCAsmParser &Parser;
34  TargetMachine &TM;
35
36protected:
37  unsigned Is64Bit : 1;
38
39private:
40  MCAsmParser &getParser() const { return Parser; }
41
42  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
43
44  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
45
46  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
47
48  X86Operand *ParseOperand();
49  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
50
51  bool ParseDirectiveWord(unsigned Size, SMLoc L);
52
53  bool MatchInstruction(SMLoc IDLoc,
54                        const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
55                        MCInst &Inst);
56
57  /// @name Auto-generated Matcher Functions
58  /// {
59
60#define GET_ASSEMBLER_HEADER
61#include "X86GenAsmMatcher.inc"
62
63  /// }
64
65public:
66  X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
67    : TargetAsmParser(T), Parser(_Parser), TM(TM) {
68
69    // Initialize the set of available features.
70    setAvailableFeatures(ComputeAvailableFeatures(
71                           &TM.getSubtarget<X86Subtarget>()));
72  }
73
74  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
75                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
76
77  virtual bool ParseDirective(AsmToken DirectiveID);
78};
79
80class X86_32ATTAsmParser : public X86ATTAsmParser {
81public:
82  X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
83    : X86ATTAsmParser(T, _Parser, TM) {
84    Is64Bit = false;
85  }
86};
87
88class X86_64ATTAsmParser : public X86ATTAsmParser {
89public:
90  X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
91    : X86ATTAsmParser(T, _Parser, TM) {
92    Is64Bit = true;
93  }
94};
95
96} // end anonymous namespace
97
98/// @name Auto-generated Match Functions
99/// {
100
101static unsigned MatchRegisterName(StringRef Name);
102
103/// }
104
105namespace {
106
107/// X86Operand - Instances of this class represent a parsed X86 machine
108/// instruction.
109struct X86Operand : public MCParsedAsmOperand {
110  enum KindTy {
111    Token,
112    Register,
113    Immediate,
114    Memory
115  } Kind;
116
117  SMLoc StartLoc, EndLoc;
118
119  union {
120    struct {
121      const char *Data;
122      unsigned Length;
123    } Tok;
124
125    struct {
126      unsigned RegNo;
127    } Reg;
128
129    struct {
130      const MCExpr *Val;
131    } Imm;
132
133    struct {
134      unsigned SegReg;
135      const MCExpr *Disp;
136      unsigned BaseReg;
137      unsigned IndexReg;
138      unsigned Scale;
139    } Mem;
140  };
141
142  X86Operand(KindTy K, SMLoc Start, SMLoc End)
143    : Kind(K), StartLoc(Start), EndLoc(End) {}
144
145  /// getStartLoc - Get the location of the first token of this operand.
146  SMLoc getStartLoc() const { return StartLoc; }
147  /// getEndLoc - Get the location of the last token of this operand.
148  SMLoc getEndLoc() const { return EndLoc; }
149
150  virtual void dump(raw_ostream &OS) const {}
151
152  StringRef getToken() const {
153    assert(Kind == Token && "Invalid access!");
154    return StringRef(Tok.Data, Tok.Length);
155  }
156  void setTokenValue(StringRef Value) {
157    assert(Kind == Token && "Invalid access!");
158    Tok.Data = Value.data();
159    Tok.Length = Value.size();
160  }
161
162  unsigned getReg() const {
163    assert(Kind == Register && "Invalid access!");
164    return Reg.RegNo;
165  }
166
167  const MCExpr *getImm() const {
168    assert(Kind == Immediate && "Invalid access!");
169    return Imm.Val;
170  }
171
172  const MCExpr *getMemDisp() const {
173    assert(Kind == Memory && "Invalid access!");
174    return Mem.Disp;
175  }
176  unsigned getMemSegReg() const {
177    assert(Kind == Memory && "Invalid access!");
178    return Mem.SegReg;
179  }
180  unsigned getMemBaseReg() const {
181    assert(Kind == Memory && "Invalid access!");
182    return Mem.BaseReg;
183  }
184  unsigned getMemIndexReg() const {
185    assert(Kind == Memory && "Invalid access!");
186    return Mem.IndexReg;
187  }
188  unsigned getMemScale() const {
189    assert(Kind == Memory && "Invalid access!");
190    return Mem.Scale;
191  }
192
193  bool isToken() const {return Kind == Token; }
194
195  bool isImm() const { return Kind == Immediate; }
196
197  bool isImmSExti16i8() const {
198    if (!isImm())
199      return false;
200
201    // If this isn't a constant expr, just assume it fits and let relaxation
202    // handle it.
203    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
204    if (!CE)
205      return true;
206
207    // Otherwise, check the value is in a range that makes sense for this
208    // extension.
209    uint64_t Value = CE->getValue();
210    return ((                                  Value <= 0x000000000000007FULL)||
211            (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
212            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
213  }
214  bool isImmSExti32i8() const {
215    if (!isImm())
216      return false;
217
218    // If this isn't a constant expr, just assume it fits and let relaxation
219    // handle it.
220    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
221    if (!CE)
222      return true;
223
224    // Otherwise, check the value is in a range that makes sense for this
225    // extension.
226    uint64_t Value = CE->getValue();
227    return ((                                  Value <= 0x000000000000007FULL)||
228            (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
229            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
230  }
231  bool isImmSExti64i8() const {
232    if (!isImm())
233      return false;
234
235    // If this isn't a constant expr, just assume it fits and let relaxation
236    // handle it.
237    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
238    if (!CE)
239      return true;
240
241    // Otherwise, check the value is in a range that makes sense for this
242    // extension.
243    uint64_t Value = CE->getValue();
244    return ((                                  Value <= 0x000000000000007FULL)||
245            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
246  }
247  bool isImmSExti64i32() const {
248    if (!isImm())
249      return false;
250
251    // If this isn't a constant expr, just assume it fits and let relaxation
252    // handle it.
253    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
254    if (!CE)
255      return true;
256
257    // Otherwise, check the value is in a range that makes sense for this
258    // extension.
259    uint64_t Value = CE->getValue();
260    return ((                                  Value <= 0x000000007FFFFFFFULL)||
261            (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
262  }
263
264  bool isMem() const { return Kind == Memory; }
265
266  bool isAbsMem() const {
267    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
268      !getMemIndexReg() && getMemScale() == 1;
269  }
270
271  bool isReg() const { return Kind == Register; }
272
273  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
274    // Add as immediates when possible.
275    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
276      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
277    else
278      Inst.addOperand(MCOperand::CreateExpr(Expr));
279  }
280
281  void addRegOperands(MCInst &Inst, unsigned N) const {
282    assert(N == 1 && "Invalid number of operands!");
283    Inst.addOperand(MCOperand::CreateReg(getReg()));
284  }
285
286  void addImmOperands(MCInst &Inst, unsigned N) const {
287    assert(N == 1 && "Invalid number of operands!");
288    addExpr(Inst, getImm());
289  }
290
291  void addMemOperands(MCInst &Inst, unsigned N) const {
292    assert((N == 5) && "Invalid number of operands!");
293    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
294    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
295    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
296    addExpr(Inst, getMemDisp());
297    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
298  }
299
300  void addAbsMemOperands(MCInst &Inst, unsigned N) const {
301    assert((N == 1) && "Invalid number of operands!");
302    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
303  }
304
305  static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
306    X86Operand *Res = new X86Operand(Token, Loc, Loc);
307    Res->Tok.Data = Str.data();
308    Res->Tok.Length = Str.size();
309    return Res;
310  }
311
312  static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
313    X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
314    Res->Reg.RegNo = RegNo;
315    return Res;
316  }
317
318  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
319    X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
320    Res->Imm.Val = Val;
321    return Res;
322  }
323
324  /// Create an absolute memory operand.
325  static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
326                               SMLoc EndLoc) {
327    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
328    Res->Mem.SegReg   = 0;
329    Res->Mem.Disp     = Disp;
330    Res->Mem.BaseReg  = 0;
331    Res->Mem.IndexReg = 0;
332    Res->Mem.Scale    = 1;
333    return Res;
334  }
335
336  /// Create a generalized memory operand.
337  static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
338                               unsigned BaseReg, unsigned IndexReg,
339                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
340    // We should never just have a displacement, that should be parsed as an
341    // absolute memory operand.
342    assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
343
344    // The scale should always be one of {1,2,4,8}.
345    assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
346           "Invalid scale!");
347    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
348    Res->Mem.SegReg   = SegReg;
349    Res->Mem.Disp     = Disp;
350    Res->Mem.BaseReg  = BaseReg;
351    Res->Mem.IndexReg = IndexReg;
352    Res->Mem.Scale    = Scale;
353    return Res;
354  }
355};
356
357} // end anonymous namespace.
358
359
360bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
361                                    SMLoc &StartLoc, SMLoc &EndLoc) {
362  RegNo = 0;
363  const AsmToken &TokPercent = Parser.getTok();
364  assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
365  StartLoc = TokPercent.getLoc();
366  Parser.Lex(); // Eat percent token.
367
368  const AsmToken &Tok = Parser.getTok();
369  if (Tok.isNot(AsmToken::Identifier))
370    return Error(Tok.getLoc(), "invalid register name");
371
372  // FIXME: Validate register for the current architecture; we have to do
373  // validation later, so maybe there is no need for this here.
374  RegNo = MatchRegisterName(Tok.getString());
375
376  // FIXME: This should be done using Requires<In32BitMode> and
377  // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
378  // can be also checked.
379  if (RegNo == X86::RIZ && !Is64Bit)
380    return Error(Tok.getLoc(), "riz register in 64-bit mode only");
381
382  // Parse %st(1) and "%st" as "%st(0)"
383  if (RegNo == 0 && Tok.getString() == "st") {
384    RegNo = X86::ST0;
385    EndLoc = Tok.getLoc();
386    Parser.Lex(); // Eat 'st'
387
388    // Check to see if we have '(4)' after %st.
389    if (getLexer().isNot(AsmToken::LParen))
390      return false;
391    // Lex the paren.
392    getParser().Lex();
393
394    const AsmToken &IntTok = Parser.getTok();
395    if (IntTok.isNot(AsmToken::Integer))
396      return Error(IntTok.getLoc(), "expected stack index");
397    switch (IntTok.getIntVal()) {
398    case 0: RegNo = X86::ST0; break;
399    case 1: RegNo = X86::ST1; break;
400    case 2: RegNo = X86::ST2; break;
401    case 3: RegNo = X86::ST3; break;
402    case 4: RegNo = X86::ST4; break;
403    case 5: RegNo = X86::ST5; break;
404    case 6: RegNo = X86::ST6; break;
405    case 7: RegNo = X86::ST7; break;
406    default: return Error(IntTok.getLoc(), "invalid stack index");
407    }
408
409    if (getParser().Lex().isNot(AsmToken::RParen))
410      return Error(Parser.getTok().getLoc(), "expected ')'");
411
412    EndLoc = Tok.getLoc();
413    Parser.Lex(); // Eat ')'
414    return false;
415  }
416
417  // If this is "db[0-7]", match it as an alias
418  // for dr[0-7].
419  if (RegNo == 0 && Tok.getString().size() == 3 &&
420      Tok.getString().startswith("db")) {
421    switch (Tok.getString()[2]) {
422    case '0': RegNo = X86::DR0; break;
423    case '1': RegNo = X86::DR1; break;
424    case '2': RegNo = X86::DR2; break;
425    case '3': RegNo = X86::DR3; break;
426    case '4': RegNo = X86::DR4; break;
427    case '5': RegNo = X86::DR5; break;
428    case '6': RegNo = X86::DR6; break;
429    case '7': RegNo = X86::DR7; break;
430    }
431
432    if (RegNo != 0) {
433      EndLoc = Tok.getLoc();
434      Parser.Lex(); // Eat it.
435      return false;
436    }
437  }
438
439  if (RegNo == 0)
440    return Error(Tok.getLoc(), "invalid register name");
441
442  EndLoc = Tok.getLoc();
443  Parser.Lex(); // Eat identifier token.
444  return false;
445}
446
447X86Operand *X86ATTAsmParser::ParseOperand() {
448  switch (getLexer().getKind()) {
449  default:
450    // Parse a memory operand with no segment register.
451    return ParseMemOperand(0, Parser.getTok().getLoc());
452  case AsmToken::Percent: {
453    // Read the register.
454    unsigned RegNo;
455    SMLoc Start, End;
456    if (ParseRegister(RegNo, Start, End)) return 0;
457    if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
458      Error(Start, "eiz and riz can only be used as index registers");
459      return 0;
460    }
461
462    // If this is a segment register followed by a ':', then this is the start
463    // of a memory reference, otherwise this is a normal register reference.
464    if (getLexer().isNot(AsmToken::Colon))
465      return X86Operand::CreateReg(RegNo, Start, End);
466
467
468    getParser().Lex(); // Eat the colon.
469    return ParseMemOperand(RegNo, Start);
470  }
471  case AsmToken::Dollar: {
472    // $42 -> immediate.
473    SMLoc Start = Parser.getTok().getLoc(), End;
474    Parser.Lex();
475    const MCExpr *Val;
476    if (getParser().ParseExpression(Val, End))
477      return 0;
478    return X86Operand::CreateImm(Val, Start, End);
479  }
480  }
481}
482
483/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
484/// has already been parsed if present.
485X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
486
487  // We have to disambiguate a parenthesized expression "(4+5)" from the start
488  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
489  // only way to do this without lookahead is to eat the '(' and see what is
490  // after it.
491  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
492  if (getLexer().isNot(AsmToken::LParen)) {
493    SMLoc ExprEnd;
494    if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
495
496    // After parsing the base expression we could either have a parenthesized
497    // memory address or not.  If not, return now.  If so, eat the (.
498    if (getLexer().isNot(AsmToken::LParen)) {
499      // Unless we have a segment register, treat this as an immediate.
500      if (SegReg == 0)
501        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
502      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
503    }
504
505    // Eat the '('.
506    Parser.Lex();
507  } else {
508    // Okay, we have a '('.  We don't know if this is an expression or not, but
509    // so we have to eat the ( to see beyond it.
510    SMLoc LParenLoc = Parser.getTok().getLoc();
511    Parser.Lex(); // Eat the '('.
512
513    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
514      // Nothing to do here, fall into the code below with the '(' part of the
515      // memory operand consumed.
516    } else {
517      SMLoc ExprEnd;
518
519      // It must be an parenthesized expression, parse it now.
520      if (getParser().ParseParenExpression(Disp, ExprEnd))
521        return 0;
522
523      // After parsing the base expression we could either have a parenthesized
524      // memory address or not.  If not, return now.  If so, eat the (.
525      if (getLexer().isNot(AsmToken::LParen)) {
526        // Unless we have a segment register, treat this as an immediate.
527        if (SegReg == 0)
528          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
529        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
530      }
531
532      // Eat the '('.
533      Parser.Lex();
534    }
535  }
536
537  // If we reached here, then we just ate the ( of the memory operand.  Process
538  // the rest of the memory operand.
539  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
540
541  if (getLexer().is(AsmToken::Percent)) {
542    SMLoc L;
543    if (ParseRegister(BaseReg, L, L)) return 0;
544    if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
545      Error(L, "eiz and riz can only be used as index registers");
546      return 0;
547    }
548  }
549
550  if (getLexer().is(AsmToken::Comma)) {
551    Parser.Lex(); // Eat the comma.
552
553    // Following the comma we should have either an index register, or a scale
554    // value. We don't support the later form, but we want to parse it
555    // correctly.
556    //
557    // Not that even though it would be completely consistent to support syntax
558    // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
559    if (getLexer().is(AsmToken::Percent)) {
560      SMLoc L;
561      if (ParseRegister(IndexReg, L, L)) return 0;
562
563      if (getLexer().isNot(AsmToken::RParen)) {
564        // Parse the scale amount:
565        //  ::= ',' [scale-expression]
566        if (getLexer().isNot(AsmToken::Comma)) {
567          Error(Parser.getTok().getLoc(),
568                "expected comma in scale expression");
569          return 0;
570        }
571        Parser.Lex(); // Eat the comma.
572
573        if (getLexer().isNot(AsmToken::RParen)) {
574          SMLoc Loc = Parser.getTok().getLoc();
575
576          int64_t ScaleVal;
577          if (getParser().ParseAbsoluteExpression(ScaleVal))
578            return 0;
579
580          // Validate the scale amount.
581          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
582            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
583            return 0;
584          }
585          Scale = (unsigned)ScaleVal;
586        }
587      }
588    } else if (getLexer().isNot(AsmToken::RParen)) {
589      // A scale amount without an index is ignored.
590      // index.
591      SMLoc Loc = Parser.getTok().getLoc();
592
593      int64_t Value;
594      if (getParser().ParseAbsoluteExpression(Value))
595        return 0;
596
597      if (Value != 1)
598        Warning(Loc, "scale factor without index register is ignored");
599      Scale = 1;
600    }
601  }
602
603  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
604  if (getLexer().isNot(AsmToken::RParen)) {
605    Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
606    return 0;
607  }
608  SMLoc MemEnd = Parser.getTok().getLoc();
609  Parser.Lex(); // Eat the ')'.
610
611  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
612                               MemStart, MemEnd);
613}
614
615bool X86ATTAsmParser::
616ParseInstruction(StringRef Name, SMLoc NameLoc,
617                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
618  // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
619  // represent alternative syntaxes in the .td file, without requiring
620  // instruction duplication.
621  StringRef PatchedName = StringSwitch<StringRef>(Name)
622    .Case("sal", "shl")
623    .Case("salb", "shlb")
624    .Case("sall", "shll")
625    .Case("salq", "shlq")
626    .Case("salw", "shlw")
627    .Case("repe", "rep")
628    .Case("repz", "rep")
629    .Case("repnz", "repne")
630    .Case("iret", "iretl")
631    .Case("sysret", "sysretl")
632    .Case("push", Is64Bit ? "pushq" : "pushl")
633    .Case("pop", Is64Bit ? "popq" : "popl")
634    .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
635    .Case("popf",  Is64Bit ? "popfq"  : "popfl")
636    .Case("pushfd", "pushfl")
637    .Case("popfd",  "popfl")
638    .Case("retl", Is64Bit ? "retl" : "ret")
639    .Case("retq", Is64Bit ? "ret" : "retq")
640    .Case("setz", "sete")  .Case("setnz", "setne")
641    .Case("setc", "setb")  .Case("setna", "setbe")
642    .Case("setnae", "setb").Case("setnb", "setae")
643    .Case("setnbe", "seta").Case("setnc", "setae")
644    .Case("setng", "setle").Case("setnge", "setl")
645    .Case("setnl", "setge").Case("setnle", "setg")
646    .Case("setpe", "setp") .Case("setpo", "setnp")
647    .Case("jz", "je")  .Case("jnz", "jne")
648    .Case("jc", "jb")  .Case("jna", "jbe")
649    .Case("jnae", "jb").Case("jnb", "jae")
650    .Case("jnbe", "ja").Case("jnc", "jae")
651    .Case("jng", "jle").Case("jnge", "jl")
652    .Case("jnl", "jge").Case("jnle", "jg")
653    .Case("jpe", "jp") .Case("jpo", "jnp")
654    // Condition code aliases for 16-bit, 32-bit, 64-bit and unspec operands.
655    .Case("cmovcw",  "cmovbw") .Case("cmovcl",  "cmovbl")
656    .Case("cmovcq",  "cmovbq") .Case("cmovc",   "cmovb")
657    .Case("cmovnaew","cmovbw") .Case("cmovnael","cmovbl")
658    .Case("cmovnaeq","cmovbq") .Case("cmovnae", "cmovb")
659    .Case("cmovnaw", "cmovbew").Case("cmovnal", "cmovbel")
660    .Case("cmovnaq", "cmovbeq").Case("cmovna",  "cmovbe")
661    .Case("cmovnbw", "cmovaew").Case("cmovnbl", "cmovael")
662    .Case("cmovnbq", "cmovaeq").Case("cmovnb",  "cmovae")
663    .Case("cmovnbew","cmovaw") .Case("cmovnbel","cmoval")
664    .Case("cmovnbeq","cmovaq") .Case("cmovnbe", "cmova")
665    .Case("cmovncw", "cmovaew").Case("cmovncl", "cmovael")
666    .Case("cmovncq", "cmovaeq").Case("cmovnc",  "cmovae")
667    .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
668    .Case("cmovngq", "cmovleq").Case("cmovng",  "cmovle")
669    .Case("cmovnw",  "cmovgew").Case("cmovnl",  "cmovgel")
670    .Case("cmovnq",  "cmovgeq").Case("cmovn",   "cmovge")
671    .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
672    .Case("cmovngq", "cmovleq").Case("cmovng",  "cmovle")
673    .Case("cmovngew","cmovlw") .Case("cmovngel","cmovll")
674    .Case("cmovngeq","cmovlq") .Case("cmovnge", "cmovl")
675    .Case("cmovnlw", "cmovgew").Case("cmovnll", "cmovgel")
676    .Case("cmovnlq", "cmovgeq").Case("cmovnl",  "cmovge")
677    .Case("cmovnlew","cmovgw") .Case("cmovnlel","cmovgl")
678    .Case("cmovnleq","cmovgq") .Case("cmovnle", "cmovg")
679    .Case("cmovnzw", "cmovnew").Case("cmovnzl", "cmovnel")
680    .Case("cmovnzq", "cmovneq").Case("cmovnz",  "cmovne")
681    .Case("cmovzw",  "cmovew") .Case("cmovzl",  "cmovel")
682    .Case("cmovzq",  "cmoveq") .Case("cmovz",   "cmove")
683    .Case("fwait", "wait")
684    .Case("movzx", "movzb")
685    .Default(Name);
686
687  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
688  const MCExpr *ExtraImmOp = 0;
689  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
690      (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
691       PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
692    bool IsVCMP = PatchedName.startswith("vcmp");
693    unsigned SSECCIdx = IsVCMP ? 4 : 3;
694    unsigned SSEComparisonCode = StringSwitch<unsigned>(
695      PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
696      .Case("eq",          0)
697      .Case("lt",          1)
698      .Case("le",          2)
699      .Case("unord",       3)
700      .Case("neq",         4)
701      .Case("nlt",         5)
702      .Case("nle",         6)
703      .Case("ord",         7)
704      .Case("eq_uq",       8)
705      .Case("nge",         9)
706      .Case("ngt",      0x0A)
707      .Case("false",    0x0B)
708      .Case("neq_oq",   0x0C)
709      .Case("ge",       0x0D)
710      .Case("gt",       0x0E)
711      .Case("true",     0x0F)
712      .Case("eq_os",    0x10)
713      .Case("lt_oq",    0x11)
714      .Case("le_oq",    0x12)
715      .Case("unord_s",  0x13)
716      .Case("neq_us",   0x14)
717      .Case("nlt_uq",   0x15)
718      .Case("nle_uq",   0x16)
719      .Case("ord_s",    0x17)
720      .Case("eq_us",    0x18)
721      .Case("nge_uq",   0x19)
722      .Case("ngt_uq",   0x1A)
723      .Case("false_os", 0x1B)
724      .Case("neq_os",   0x1C)
725      .Case("ge_oq",    0x1D)
726      .Case("gt_oq",    0x1E)
727      .Case("true_us",  0x1F)
728      .Default(~0U);
729    if (SSEComparisonCode != ~0U) {
730      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
731                                          getParser().getContext());
732      if (PatchedName.endswith("ss")) {
733        PatchedName = IsVCMP ? "vcmpss" : "cmpss";
734      } else if (PatchedName.endswith("sd")) {
735        PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
736      } else if (PatchedName.endswith("ps")) {
737        PatchedName = IsVCMP ? "vcmpps" : "cmpps";
738      } else {
739        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
740        PatchedName = IsVCMP ? "vcmppd" : "cmppd";
741      }
742    }
743  }
744
745  // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
746  if (PatchedName.startswith("vpclmul")) {
747    unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
748      PatchedName.slice(7, PatchedName.size() - 2))
749      .Case("lqlq", 0x00) // src1[63:0],   src2[63:0]
750      .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
751      .Case("lqhq", 0x10) // src1[63:0],   src2[127:64]
752      .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
753      .Default(~0U);
754    if (CLMULQuadWordSelect != ~0U) {
755      ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
756                                          getParser().getContext());
757      assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
758      PatchedName = "vpclmulqdq";
759    }
760  }
761
762  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
763
764  if (ExtraImmOp)
765    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
766
767
768  // Determine whether this is an instruction prefix.
769  bool isPrefix =
770    PatchedName == "lock" || PatchedName == "rep" ||
771    PatchedName == "repne";
772
773
774  // This does the actual operand parsing.  Don't parse any more if we have a
775  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
776  // just want to parse the "lock" as the first instruction and the "incl" as
777  // the next one.
778  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
779
780    // Parse '*' modifier.
781    if (getLexer().is(AsmToken::Star)) {
782      SMLoc Loc = Parser.getTok().getLoc();
783      Operands.push_back(X86Operand::CreateToken("*", Loc));
784      Parser.Lex(); // Eat the star.
785    }
786
787    // Read the first operand.
788    if (X86Operand *Op = ParseOperand())
789      Operands.push_back(Op);
790    else {
791      Parser.EatToEndOfStatement();
792      return true;
793    }
794
795    while (getLexer().is(AsmToken::Comma)) {
796      Parser.Lex();  // Eat the comma.
797
798      // Parse and remember the operand.
799      if (X86Operand *Op = ParseOperand())
800        Operands.push_back(Op);
801      else {
802        Parser.EatToEndOfStatement();
803        return true;
804      }
805    }
806
807    if (getLexer().isNot(AsmToken::EndOfStatement)) {
808      Parser.EatToEndOfStatement();
809      return TokError("unexpected token in argument list");
810    }
811  }
812
813  if (getLexer().is(AsmToken::EndOfStatement))
814    Parser.Lex(); // Consume the EndOfStatement
815
816  // FIXME: Hack to handle recognize s{hr,ar,hl} <op>, $1.  Canonicalize to
817  // "shift <op>".
818  if ((Name.startswith("shr") || Name.startswith("sar") ||
819       Name.startswith("shl")) &&
820      Operands.size() == 3) {
821    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
822    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
823        cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
824      delete Operands[1];
825      Operands.erase(Operands.begin() + 1);
826    }
827  }
828
829  // FIXME: Hack to handle recognize "in[bwl] <op>".  Canonicalize it to
830  // "inb <op>, %al".
831  if ((Name == "inb" || Name == "inw" || Name == "inl") &&
832      Operands.size() == 2) {
833    unsigned Reg;
834    if (Name[2] == 'b')
835      Reg = MatchRegisterName("al");
836    else if (Name[2] == 'w')
837      Reg = MatchRegisterName("ax");
838    else
839      Reg = MatchRegisterName("eax");
840    SMLoc Loc = Operands.back()->getEndLoc();
841    Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
842  }
843
844  // FIXME: Hack to handle recognize "out[bwl] <op>".  Canonicalize it to
845  // "outb %al, <op>".
846  if ((Name == "outb" || Name == "outw" || Name == "outl") &&
847      Operands.size() == 2) {
848    unsigned Reg;
849    if (Name[3] == 'b')
850      Reg = MatchRegisterName("al");
851    else if (Name[3] == 'w')
852      Reg = MatchRegisterName("ax");
853    else
854      Reg = MatchRegisterName("eax");
855    SMLoc Loc = Operands.back()->getEndLoc();
856    Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
857    std::swap(Operands[1], Operands[2]);
858  }
859
860  // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
861  if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
862      Operands.size() == 3) {
863    X86Operand &Op = *(X86Operand*)Operands.back();
864    if (Op.isMem() && Op.Mem.SegReg == 0 &&
865        isa<MCConstantExpr>(Op.Mem.Disp) &&
866        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
867        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
868      SMLoc Loc = Op.getEndLoc();
869      Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
870      delete &Op;
871    }
872  }
873
874  // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
875  // "f{mul*,add*,sub*,div*} $op"
876  if ((Name.startswith("fmul") || Name.startswith("fadd") ||
877       Name.startswith("fsub") || Name.startswith("fdiv")) &&
878      Operands.size() == 3 &&
879      static_cast<X86Operand*>(Operands[2])->isReg() &&
880      static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
881    delete Operands[2];
882    Operands.erase(Operands.begin() + 2);
883  }
884
885  // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
886  // B".
887  if (Name.startswith("imul") && Operands.size() == 3 &&
888      static_cast<X86Operand*>(Operands[1])->isImm() &&
889      static_cast<X86Operand*>(Operands.back())->isReg()) {
890    X86Operand *Op = static_cast<X86Operand*>(Operands.back());
891    Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
892                                             Op->getEndLoc()));
893  }
894
895  // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
896  // effect (both store to a 16-bit mem).  Force to sldtw to avoid ambiguity
897  // errors, since its encoding is the most compact.
898  if (Name == "sldt" && Operands.size() == 2 &&
899      static_cast<X86Operand*>(Operands[1])->isMem()) {
900    delete Operands[0];
901    Operands[0] = X86Operand::CreateToken("sldtw", NameLoc);
902  }
903
904  // The assembler accepts "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as
905  // synonyms.  Our tables only have the "<reg>, <mem>" form, so if we see the
906  // other operand order, swap them.
907  if (Name == "xchgb" || Name == "xchgw" || Name == "xchgl" || Name == "xchgq"||
908      Name == "xchg")
909    if (Operands.size() == 3 &&
910        static_cast<X86Operand*>(Operands[1])->isMem() &&
911        static_cast<X86Operand*>(Operands[2])->isReg()) {
912      std::swap(Operands[1], Operands[2]);
913    }
914
915  // The assembler accepts "testX <reg>, <mem>" and "testX <mem>, <reg>" as
916  // synonyms.  Our tables only have the "<mem>, <reg>" form, so if we see the
917  // other operand order, swap them.
918  if (Name == "testb" || Name == "testw" || Name == "testl" || Name == "testq"||
919      Name == "test")
920    if (Operands.size() == 3 &&
921        static_cast<X86Operand*>(Operands[1])->isReg() &&
922        static_cast<X86Operand*>(Operands[2])->isMem()) {
923      std::swap(Operands[1], Operands[2]);
924    }
925
926  return false;
927}
928
929bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
930  StringRef IDVal = DirectiveID.getIdentifier();
931  if (IDVal == ".word")
932    return ParseDirectiveWord(2, DirectiveID.getLoc());
933  return true;
934}
935
936/// ParseDirectiveWord
937///  ::= .word [ expression (, expression)* ]
938bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
939  if (getLexer().isNot(AsmToken::EndOfStatement)) {
940    for (;;) {
941      const MCExpr *Value;
942      if (getParser().ParseExpression(Value))
943        return true;
944
945      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
946
947      if (getLexer().is(AsmToken::EndOfStatement))
948        break;
949
950      // FIXME: Improve diagnostic.
951      if (getLexer().isNot(AsmToken::Comma))
952        return Error(L, "unexpected token in directive");
953      Parser.Lex();
954    }
955  }
956
957  Parser.Lex();
958  return false;
959}
960
961
962bool
963X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
964                                  const SmallVectorImpl<MCParsedAsmOperand*>
965                                    &Operands,
966                                  MCInst &Inst) {
967  assert(!Operands.empty() && "Unexpect empty operand list!");
968
969  bool WasOriginallyInvalidOperand = false;
970  unsigned OrigErrorInfo;
971
972  // First, try a direct match.
973  switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
974  case Match_Success:
975    return false;
976  case Match_MissingFeature:
977    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
978    return true;
979  case Match_InvalidOperand:
980    WasOriginallyInvalidOperand = true;
981    break;
982  case Match_MnemonicFail:
983    break;
984  }
985
986  // FIXME: Ideally, we would only attempt suffix matches for things which are
987  // valid prefixes, and we could just infer the right unambiguous
988  // type. However, that requires substantially more matcher support than the
989  // following hack.
990
991  X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
992  assert(Op->isToken() && "Leading operand should always be a mnemonic!");
993
994  // Change the operand to point to a temporary token.
995  StringRef Base = Op->getToken();
996  SmallString<16> Tmp;
997  Tmp += Base;
998  Tmp += ' ';
999  Op->setTokenValue(Tmp.str());
1000
1001  // Check for the various suffix matches.
1002  Tmp[Base.size()] = 'b';
1003  unsigned BErrorInfo, WErrorInfo, LErrorInfo, QErrorInfo;
1004  MatchResultTy MatchB = MatchInstructionImpl(Operands, Inst, BErrorInfo);
1005  Tmp[Base.size()] = 'w';
1006  MatchResultTy MatchW = MatchInstructionImpl(Operands, Inst, WErrorInfo);
1007  Tmp[Base.size()] = 'l';
1008  MatchResultTy MatchL = MatchInstructionImpl(Operands, Inst, LErrorInfo);
1009  Tmp[Base.size()] = 'q';
1010  MatchResultTy MatchQ = MatchInstructionImpl(Operands, Inst, QErrorInfo);
1011
1012  // Restore the old token.
1013  Op->setTokenValue(Base);
1014
1015  // If exactly one matched, then we treat that as a successful match (and the
1016  // instruction will already have been filled in correctly, since the failing
1017  // matches won't have modified it).
1018  unsigned NumSuccessfulMatches =
1019    (MatchB == Match_Success) + (MatchW == Match_Success) +
1020    (MatchL == Match_Success) + (MatchQ == Match_Success);
1021  if (NumSuccessfulMatches == 1)
1022    return false;
1023
1024  // Otherwise, the match failed, try to produce a decent error message.
1025
1026  // If we had multiple suffix matches, then identify this as an ambiguous
1027  // match.
1028  if (NumSuccessfulMatches > 1) {
1029    char MatchChars[4];
1030    unsigned NumMatches = 0;
1031    if (MatchB == Match_Success)
1032      MatchChars[NumMatches++] = 'b';
1033    if (MatchW == Match_Success)
1034      MatchChars[NumMatches++] = 'w';
1035    if (MatchL == Match_Success)
1036      MatchChars[NumMatches++] = 'l';
1037    if (MatchQ == Match_Success)
1038      MatchChars[NumMatches++] = 'q';
1039
1040    SmallString<126> Msg;
1041    raw_svector_ostream OS(Msg);
1042    OS << "ambiguous instructions require an explicit suffix (could be ";
1043    for (unsigned i = 0; i != NumMatches; ++i) {
1044      if (i != 0)
1045        OS << ", ";
1046      if (i + 1 == NumMatches)
1047        OS << "or ";
1048      OS << "'" << Base << MatchChars[i] << "'";
1049    }
1050    OS << ")";
1051    Error(IDLoc, OS.str());
1052    return true;
1053  }
1054
1055  // Okay, we know that none of the variants matched successfully.
1056
1057  // If all of the instructions reported an invalid mnemonic, then the original
1058  // mnemonic was invalid.
1059  if ((MatchB == Match_MnemonicFail) && (MatchW == Match_MnemonicFail) &&
1060      (MatchL == Match_MnemonicFail) && (MatchQ == Match_MnemonicFail)) {
1061    if (!WasOriginallyInvalidOperand) {
1062      Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
1063      return true;
1064    }
1065
1066    // Recover location info for the operand if we know which was the problem.
1067    SMLoc ErrorLoc = IDLoc;
1068    if (OrigErrorInfo != ~0U) {
1069      ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
1070      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
1071    }
1072
1073    Error(ErrorLoc, "invalid operand for instruction");
1074    return true;
1075  }
1076
1077  // If one instruction matched with a missing feature, report this as a
1078  // missing feature.
1079  if ((MatchB == Match_MissingFeature) + (MatchW == Match_MissingFeature) +
1080      (MatchL == Match_MissingFeature) + (MatchQ == Match_MissingFeature) == 1){
1081    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
1082    return true;
1083  }
1084
1085  // If one instruction matched with an invalid operand, report this as an
1086  // operand failure.
1087  if ((MatchB == Match_InvalidOperand) + (MatchW == Match_InvalidOperand) +
1088      (MatchL == Match_InvalidOperand) + (MatchQ == Match_InvalidOperand) == 1){
1089    Error(IDLoc, "invalid operand for instruction");
1090    return true;
1091  }
1092
1093  // If all of these were an outright failure, report it in a useless way.
1094  // FIXME: We should give nicer diagnostics about the exact failure.
1095  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
1096  return true;
1097}
1098
1099
1100extern "C" void LLVMInitializeX86AsmLexer();
1101
1102// Force static initialization.
1103extern "C" void LLVMInitializeX86AsmParser() {
1104  RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
1105  RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
1106  LLVMInitializeX86AsmLexer();
1107}
1108
1109#define GET_REGISTER_MATCHER
1110#define GET_MATCHER_IMPLEMENTATION
1111#include "X86GenAsmMatcher.inc"
1112