X86AsmParser.cpp revision 373c458850a963ab062046529337fe976e1f944d
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/Target/TargetAsmParser.h"
11#include "X86.h"
12#include "X86Subtarget.h"
13#include "llvm/ADT/SmallString.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/ADT/StringSwitch.h"
16#include "llvm/ADT/Twine.h"
17#include "llvm/MC/MCStreamer.h"
18#include "llvm/MC/MCExpr.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCParser/MCAsmLexer.h"
21#include "llvm/MC/MCParser/MCAsmParser.h"
22#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
23#include "llvm/Support/SourceMgr.h"
24#include "llvm/Support/raw_ostream.h"
25#include "llvm/Target/TargetRegistry.h"
26#include "llvm/Target/TargetAsmParser.h"
27using namespace llvm;
28
29namespace {
30struct X86Operand;
31
32class X86ATTAsmParser : public TargetAsmParser {
33  MCAsmParser &Parser;
34  TargetMachine &TM;
35
36protected:
37  unsigned Is64Bit : 1;
38
39private:
40  MCAsmParser &getParser() const { return Parser; }
41
42  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
43
44  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
45
46  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
47
48  X86Operand *ParseOperand();
49  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
50
51  bool ParseDirectiveWord(unsigned Size, SMLoc L);
52
53  bool MatchInstruction(SMLoc IDLoc,
54                        const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
55                        MCInst &Inst);
56
57  /// @name Auto-generated Matcher Functions
58  /// {
59
60#define GET_ASSEMBLER_HEADER
61#include "X86GenAsmMatcher.inc"
62
63  /// }
64
65public:
66  X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
67    : TargetAsmParser(T), Parser(_Parser), TM(TM) {
68
69    // Initialize the set of available features.
70    setAvailableFeatures(ComputeAvailableFeatures(
71                           &TM.getSubtarget<X86Subtarget>()));
72  }
73
74  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
75                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
76
77  virtual bool ParseDirective(AsmToken DirectiveID);
78};
79
80class X86_32ATTAsmParser : public X86ATTAsmParser {
81public:
82  X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
83    : X86ATTAsmParser(T, _Parser, TM) {
84    Is64Bit = false;
85  }
86};
87
88class X86_64ATTAsmParser : public X86ATTAsmParser {
89public:
90  X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
91    : X86ATTAsmParser(T, _Parser, TM) {
92    Is64Bit = true;
93  }
94};
95
96} // end anonymous namespace
97
98/// @name Auto-generated Match Functions
99/// {
100
101static unsigned MatchRegisterName(StringRef Name);
102
103/// }
104
105namespace {
106
107/// X86Operand - Instances of this class represent a parsed X86 machine
108/// instruction.
109struct X86Operand : public MCParsedAsmOperand {
110  enum KindTy {
111    Token,
112    Register,
113    Immediate,
114    Memory
115  } Kind;
116
117  SMLoc StartLoc, EndLoc;
118
119  union {
120    struct {
121      const char *Data;
122      unsigned Length;
123    } Tok;
124
125    struct {
126      unsigned RegNo;
127    } Reg;
128
129    struct {
130      const MCExpr *Val;
131    } Imm;
132
133    struct {
134      unsigned SegReg;
135      const MCExpr *Disp;
136      unsigned BaseReg;
137      unsigned IndexReg;
138      unsigned Scale;
139    } Mem;
140  };
141
142  X86Operand(KindTy K, SMLoc Start, SMLoc End)
143    : Kind(K), StartLoc(Start), EndLoc(End) {}
144
145  /// getStartLoc - Get the location of the first token of this operand.
146  SMLoc getStartLoc() const { return StartLoc; }
147  /// getEndLoc - Get the location of the last token of this operand.
148  SMLoc getEndLoc() const { return EndLoc; }
149
150  virtual void dump(raw_ostream &OS) const {}
151
152  StringRef getToken() const {
153    assert(Kind == Token && "Invalid access!");
154    return StringRef(Tok.Data, Tok.Length);
155  }
156  void setTokenValue(StringRef Value) {
157    assert(Kind == Token && "Invalid access!");
158    Tok.Data = Value.data();
159    Tok.Length = Value.size();
160  }
161
162  unsigned getReg() const {
163    assert(Kind == Register && "Invalid access!");
164    return Reg.RegNo;
165  }
166
167  const MCExpr *getImm() const {
168    assert(Kind == Immediate && "Invalid access!");
169    return Imm.Val;
170  }
171
172  const MCExpr *getMemDisp() const {
173    assert(Kind == Memory && "Invalid access!");
174    return Mem.Disp;
175  }
176  unsigned getMemSegReg() const {
177    assert(Kind == Memory && "Invalid access!");
178    return Mem.SegReg;
179  }
180  unsigned getMemBaseReg() const {
181    assert(Kind == Memory && "Invalid access!");
182    return Mem.BaseReg;
183  }
184  unsigned getMemIndexReg() const {
185    assert(Kind == Memory && "Invalid access!");
186    return Mem.IndexReg;
187  }
188  unsigned getMemScale() const {
189    assert(Kind == Memory && "Invalid access!");
190    return Mem.Scale;
191  }
192
193  bool isToken() const {return Kind == Token; }
194
195  bool isImm() const { return Kind == Immediate; }
196
197  bool isImmSExti16i8() const {
198    if (!isImm())
199      return false;
200
201    // If this isn't a constant expr, just assume it fits and let relaxation
202    // handle it.
203    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
204    if (!CE)
205      return true;
206
207    // Otherwise, check the value is in a range that makes sense for this
208    // extension.
209    uint64_t Value = CE->getValue();
210    return ((                                  Value <= 0x000000000000007FULL)||
211            (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
212            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
213  }
214  bool isImmSExti32i8() const {
215    if (!isImm())
216      return false;
217
218    // If this isn't a constant expr, just assume it fits and let relaxation
219    // handle it.
220    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
221    if (!CE)
222      return true;
223
224    // Otherwise, check the value is in a range that makes sense for this
225    // extension.
226    uint64_t Value = CE->getValue();
227    return ((                                  Value <= 0x000000000000007FULL)||
228            (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
229            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
230  }
231  bool isImmSExti64i8() const {
232    if (!isImm())
233      return false;
234
235    // If this isn't a constant expr, just assume it fits and let relaxation
236    // handle it.
237    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
238    if (!CE)
239      return true;
240
241    // Otherwise, check the value is in a range that makes sense for this
242    // extension.
243    uint64_t Value = CE->getValue();
244    return ((                                  Value <= 0x000000000000007FULL)||
245            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
246  }
247  bool isImmSExti64i32() const {
248    if (!isImm())
249      return false;
250
251    // If this isn't a constant expr, just assume it fits and let relaxation
252    // handle it.
253    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
254    if (!CE)
255      return true;
256
257    // Otherwise, check the value is in a range that makes sense for this
258    // extension.
259    uint64_t Value = CE->getValue();
260    return ((                                  Value <= 0x000000007FFFFFFFULL)||
261            (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
262  }
263
264  bool isMem() const { return Kind == Memory; }
265
266  bool isAbsMem() const {
267    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
268      !getMemIndexReg() && getMemScale() == 1;
269  }
270
271  bool isReg() const { return Kind == Register; }
272
273  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
274    // Add as immediates when possible.
275    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
276      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
277    else
278      Inst.addOperand(MCOperand::CreateExpr(Expr));
279  }
280
281  void addRegOperands(MCInst &Inst, unsigned N) const {
282    assert(N == 1 && "Invalid number of operands!");
283    Inst.addOperand(MCOperand::CreateReg(getReg()));
284  }
285
286  void addImmOperands(MCInst &Inst, unsigned N) const {
287    assert(N == 1 && "Invalid number of operands!");
288    addExpr(Inst, getImm());
289  }
290
291  void addMemOperands(MCInst &Inst, unsigned N) const {
292    assert((N == 5) && "Invalid number of operands!");
293    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
294    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
295    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
296    addExpr(Inst, getMemDisp());
297    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
298  }
299
300  void addAbsMemOperands(MCInst &Inst, unsigned N) const {
301    assert((N == 1) && "Invalid number of operands!");
302    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
303  }
304
305  static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
306    X86Operand *Res = new X86Operand(Token, Loc, Loc);
307    Res->Tok.Data = Str.data();
308    Res->Tok.Length = Str.size();
309    return Res;
310  }
311
312  static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
313    X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
314    Res->Reg.RegNo = RegNo;
315    return Res;
316  }
317
318  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
319    X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
320    Res->Imm.Val = Val;
321    return Res;
322  }
323
324  /// Create an absolute memory operand.
325  static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
326                               SMLoc EndLoc) {
327    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
328    Res->Mem.SegReg   = 0;
329    Res->Mem.Disp     = Disp;
330    Res->Mem.BaseReg  = 0;
331    Res->Mem.IndexReg = 0;
332    Res->Mem.Scale    = 1;
333    return Res;
334  }
335
336  /// Create a generalized memory operand.
337  static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
338                               unsigned BaseReg, unsigned IndexReg,
339                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
340    // We should never just have a displacement, that should be parsed as an
341    // absolute memory operand.
342    assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
343
344    // The scale should always be one of {1,2,4,8}.
345    assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
346           "Invalid scale!");
347    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
348    Res->Mem.SegReg   = SegReg;
349    Res->Mem.Disp     = Disp;
350    Res->Mem.BaseReg  = BaseReg;
351    Res->Mem.IndexReg = IndexReg;
352    Res->Mem.Scale    = Scale;
353    return Res;
354  }
355};
356
357} // end anonymous namespace.
358
359
360bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
361                                    SMLoc &StartLoc, SMLoc &EndLoc) {
362  RegNo = 0;
363  const AsmToken &TokPercent = Parser.getTok();
364  assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
365  StartLoc = TokPercent.getLoc();
366  Parser.Lex(); // Eat percent token.
367
368  const AsmToken &Tok = Parser.getTok();
369  if (Tok.isNot(AsmToken::Identifier))
370    return Error(Tok.getLoc(), "invalid register name");
371
372  // FIXME: Validate register for the current architecture; we have to do
373  // validation later, so maybe there is no need for this here.
374  RegNo = MatchRegisterName(Tok.getString());
375
376  // FIXME: This should be done using Requires<In32BitMode> and
377  // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
378  // can be also checked.
379  if (RegNo == X86::RIZ && !Is64Bit)
380    return Error(Tok.getLoc(), "riz register in 64-bit mode only");
381
382  // Parse %st(1) and "%st" as "%st(0)"
383  if (RegNo == 0 && Tok.getString() == "st") {
384    RegNo = X86::ST0;
385    EndLoc = Tok.getLoc();
386    Parser.Lex(); // Eat 'st'
387
388    // Check to see if we have '(4)' after %st.
389    if (getLexer().isNot(AsmToken::LParen))
390      return false;
391    // Lex the paren.
392    getParser().Lex();
393
394    const AsmToken &IntTok = Parser.getTok();
395    if (IntTok.isNot(AsmToken::Integer))
396      return Error(IntTok.getLoc(), "expected stack index");
397    switch (IntTok.getIntVal()) {
398    case 0: RegNo = X86::ST0; break;
399    case 1: RegNo = X86::ST1; break;
400    case 2: RegNo = X86::ST2; break;
401    case 3: RegNo = X86::ST3; break;
402    case 4: RegNo = X86::ST4; break;
403    case 5: RegNo = X86::ST5; break;
404    case 6: RegNo = X86::ST6; break;
405    case 7: RegNo = X86::ST7; break;
406    default: return Error(IntTok.getLoc(), "invalid stack index");
407    }
408
409    if (getParser().Lex().isNot(AsmToken::RParen))
410      return Error(Parser.getTok().getLoc(), "expected ')'");
411
412    EndLoc = Tok.getLoc();
413    Parser.Lex(); // Eat ')'
414    return false;
415  }
416
417  // If this is "db[0-7]", match it as an alias
418  // for dr[0-7].
419  if (RegNo == 0 && Tok.getString().size() == 3 &&
420      Tok.getString().startswith("db")) {
421    switch (Tok.getString()[2]) {
422    case '0': RegNo = X86::DR0; break;
423    case '1': RegNo = X86::DR1; break;
424    case '2': RegNo = X86::DR2; break;
425    case '3': RegNo = X86::DR3; break;
426    case '4': RegNo = X86::DR4; break;
427    case '5': RegNo = X86::DR5; break;
428    case '6': RegNo = X86::DR6; break;
429    case '7': RegNo = X86::DR7; break;
430    }
431
432    if (RegNo != 0) {
433      EndLoc = Tok.getLoc();
434      Parser.Lex(); // Eat it.
435      return false;
436    }
437  }
438
439  if (RegNo == 0)
440    return Error(Tok.getLoc(), "invalid register name");
441
442  EndLoc = Tok.getLoc();
443  Parser.Lex(); // Eat identifier token.
444  return false;
445}
446
447X86Operand *X86ATTAsmParser::ParseOperand() {
448  switch (getLexer().getKind()) {
449  default:
450    // Parse a memory operand with no segment register.
451    return ParseMemOperand(0, Parser.getTok().getLoc());
452  case AsmToken::Percent: {
453    // Read the register.
454    unsigned RegNo;
455    SMLoc Start, End;
456    if (ParseRegister(RegNo, Start, End)) return 0;
457    if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
458      Error(Start, "eiz and riz can only be used as index registers");
459      return 0;
460    }
461
462    // If this is a segment register followed by a ':', then this is the start
463    // of a memory reference, otherwise this is a normal register reference.
464    if (getLexer().isNot(AsmToken::Colon))
465      return X86Operand::CreateReg(RegNo, Start, End);
466
467
468    getParser().Lex(); // Eat the colon.
469    return ParseMemOperand(RegNo, Start);
470  }
471  case AsmToken::Dollar: {
472    // $42 -> immediate.
473    SMLoc Start = Parser.getTok().getLoc(), End;
474    Parser.Lex();
475    const MCExpr *Val;
476    if (getParser().ParseExpression(Val, End))
477      return 0;
478    return X86Operand::CreateImm(Val, Start, End);
479  }
480  }
481}
482
483/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
484/// has already been parsed if present.
485X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
486
487  // We have to disambiguate a parenthesized expression "(4+5)" from the start
488  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
489  // only way to do this without lookahead is to eat the '(' and see what is
490  // after it.
491  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
492  if (getLexer().isNot(AsmToken::LParen)) {
493    SMLoc ExprEnd;
494    if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
495
496    // After parsing the base expression we could either have a parenthesized
497    // memory address or not.  If not, return now.  If so, eat the (.
498    if (getLexer().isNot(AsmToken::LParen)) {
499      // Unless we have a segment register, treat this as an immediate.
500      if (SegReg == 0)
501        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
502      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
503    }
504
505    // Eat the '('.
506    Parser.Lex();
507  } else {
508    // Okay, we have a '('.  We don't know if this is an expression or not, but
509    // so we have to eat the ( to see beyond it.
510    SMLoc LParenLoc = Parser.getTok().getLoc();
511    Parser.Lex(); // Eat the '('.
512
513    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
514      // Nothing to do here, fall into the code below with the '(' part of the
515      // memory operand consumed.
516    } else {
517      SMLoc ExprEnd;
518
519      // It must be an parenthesized expression, parse it now.
520      if (getParser().ParseParenExpression(Disp, ExprEnd))
521        return 0;
522
523      // After parsing the base expression we could either have a parenthesized
524      // memory address or not.  If not, return now.  If so, eat the (.
525      if (getLexer().isNot(AsmToken::LParen)) {
526        // Unless we have a segment register, treat this as an immediate.
527        if (SegReg == 0)
528          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
529        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
530      }
531
532      // Eat the '('.
533      Parser.Lex();
534    }
535  }
536
537  // If we reached here, then we just ate the ( of the memory operand.  Process
538  // the rest of the memory operand.
539  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
540
541  if (getLexer().is(AsmToken::Percent)) {
542    SMLoc L;
543    if (ParseRegister(BaseReg, L, L)) return 0;
544    if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
545      Error(L, "eiz and riz can only be used as index registers");
546      return 0;
547    }
548  }
549
550  if (getLexer().is(AsmToken::Comma)) {
551    Parser.Lex(); // Eat the comma.
552
553    // Following the comma we should have either an index register, or a scale
554    // value. We don't support the later form, but we want to parse it
555    // correctly.
556    //
557    // Not that even though it would be completely consistent to support syntax
558    // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
559    if (getLexer().is(AsmToken::Percent)) {
560      SMLoc L;
561      if (ParseRegister(IndexReg, L, L)) return 0;
562
563      if (getLexer().isNot(AsmToken::RParen)) {
564        // Parse the scale amount:
565        //  ::= ',' [scale-expression]
566        if (getLexer().isNot(AsmToken::Comma)) {
567          Error(Parser.getTok().getLoc(),
568                "expected comma in scale expression");
569          return 0;
570        }
571        Parser.Lex(); // Eat the comma.
572
573        if (getLexer().isNot(AsmToken::RParen)) {
574          SMLoc Loc = Parser.getTok().getLoc();
575
576          int64_t ScaleVal;
577          if (getParser().ParseAbsoluteExpression(ScaleVal))
578            return 0;
579
580          // Validate the scale amount.
581          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
582            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
583            return 0;
584          }
585          Scale = (unsigned)ScaleVal;
586        }
587      }
588    } else if (getLexer().isNot(AsmToken::RParen)) {
589      // A scale amount without an index is ignored.
590      // index.
591      SMLoc Loc = Parser.getTok().getLoc();
592
593      int64_t Value;
594      if (getParser().ParseAbsoluteExpression(Value))
595        return 0;
596
597      if (Value != 1)
598        Warning(Loc, "scale factor without index register is ignored");
599      Scale = 1;
600    }
601  }
602
603  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
604  if (getLexer().isNot(AsmToken::RParen)) {
605    Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
606    return 0;
607  }
608  SMLoc MemEnd = Parser.getTok().getLoc();
609  Parser.Lex(); // Eat the ')'.
610
611  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
612                               MemStart, MemEnd);
613}
614
615bool X86ATTAsmParser::
616ParseInstruction(StringRef Name, SMLoc NameLoc,
617                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
618  // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
619  // represent alternative syntaxes in the .td file, without requiring
620  // instruction duplication.
621  StringRef PatchedName = StringSwitch<StringRef>(Name)
622    .Case("sal", "shl")
623    .Case("salb", "shlb")
624    .Case("sall", "shll")
625    .Case("salq", "shlq")
626    .Case("salw", "shlw")
627    .Case("repe", "rep")
628    .Case("repz", "rep")
629    .Case("repnz", "repne")
630    .Case("iret", "iretl")
631    .Case("sysret", "sysretl")
632    .Case("push", Is64Bit ? "pushq" : "pushl")
633    .Case("pop", Is64Bit ? "popq" : "popl")
634    .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
635    .Case("popf",  Is64Bit ? "popfq"  : "popfl")
636    .Case("retl", Is64Bit ? "retl" : "ret")
637    .Case("retq", Is64Bit ? "ret" : "retq")
638    .Case("setz", "sete")
639    .Case("setnz", "setne")
640    .Case("jz", "je")
641    .Case("jnz", "jne")
642    .Case("jc", "jb")
643    .Case("jna", "jbe")
644    .Case("jnae", "jb")
645    .Case("jnb", "jae")
646    .Case("jnbe", "ja")
647    .Case("jnc", "jae")
648    .Case("jng", "jle")
649    .Case("jnge", "jl")
650    .Case("jnl", "jge")
651    .Case("jnle", "jg")
652    .Case("jpe", "jp")
653    .Case("jpo", "jnp")
654    // Condition code aliases for 16-bit, 32-bit, 64-bit and unspec operands.
655    .Case("cmovcw",  "cmovbw") .Case("cmovcl",  "cmovbl")
656    .Case("cmovcq",  "cmovbq") .Case("cmovc",   "cmovb")
657    .Case("cmovnaw", "cmovbew").Case("cmovnal", "cmovbel")
658    .Case("cmovnaq", "cmovbeq").Case("cmovna",  "cmovbe")
659    .Case("cmovnbw", "cmovaew").Case("cmovnbl", "cmovael")
660    .Case("cmovnbq", "cmovaeq").Case("cmovnb",  "cmovae")
661    .Case("cmovnbew","cmovaw") .Case("cmovnbel","cmoval")
662    .Case("cmovnbeq","cmovaq") .Case("cmovnbe", "cmova")
663    .Case("cmovncw", "cmovaew").Case("cmovncl", "cmovael")
664    .Case("cmovncq", "cmovaeq").Case("cmovnc",  "cmovae")
665    .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
666    .Case("cmovngq", "cmovleq").Case("cmovng",  "cmovle")
667    .Case("cmovnw",  "cmovgew").Case("cmovnl",  "cmovgel")
668    .Case("cmovnq",  "cmovgeq").Case("cmovn",   "cmovge")
669    .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
670    .Case("cmovngq", "cmovleq").Case("cmovng",  "cmovle")
671    .Case("cmovngew","cmovlw") .Case("cmovngel","cmovll")
672    .Case("cmovngeq","cmovlq") .Case("cmovnge", "cmovl")
673    .Case("cmovnlw", "cmovgew").Case("cmovnll", "cmovgel")
674    .Case("cmovnlq", "cmovgeq").Case("cmovnl",  "cmovge")
675    .Case("cmovnlew","cmovgw") .Case("cmovnlel","cmovgl")
676    .Case("cmovnleq","cmovgq") .Case("cmovnle", "cmovg")
677    .Case("cmovnzw", "cmovnew").Case("cmovnzl", "cmovnel")
678    .Case("cmovnzq", "cmovneq").Case("cmovnz",  "cmovne")
679    .Case("cmovzw",  "cmovew") .Case("cmovzl",  "cmovel")
680    .Case("cmovzq",  "cmoveq") .Case("cmovz",   "cmove")
681    .Case("fwait", "wait")
682    .Case("movzx", "movzb")
683    .Default(Name);
684
685  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
686  const MCExpr *ExtraImmOp = 0;
687  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
688      (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
689       PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
690    bool IsVCMP = PatchedName.startswith("vcmp");
691    unsigned SSECCIdx = IsVCMP ? 4 : 3;
692    unsigned SSEComparisonCode = StringSwitch<unsigned>(
693      PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
694      .Case("eq",          0)
695      .Case("lt",          1)
696      .Case("le",          2)
697      .Case("unord",       3)
698      .Case("neq",         4)
699      .Case("nlt",         5)
700      .Case("nle",         6)
701      .Case("ord",         7)
702      .Case("eq_uq",       8)
703      .Case("nge",         9)
704      .Case("ngt",      0x0A)
705      .Case("false",    0x0B)
706      .Case("neq_oq",   0x0C)
707      .Case("ge",       0x0D)
708      .Case("gt",       0x0E)
709      .Case("true",     0x0F)
710      .Case("eq_os",    0x10)
711      .Case("lt_oq",    0x11)
712      .Case("le_oq",    0x12)
713      .Case("unord_s",  0x13)
714      .Case("neq_us",   0x14)
715      .Case("nlt_uq",   0x15)
716      .Case("nle_uq",   0x16)
717      .Case("ord_s",    0x17)
718      .Case("eq_us",    0x18)
719      .Case("nge_uq",   0x19)
720      .Case("ngt_uq",   0x1A)
721      .Case("false_os", 0x1B)
722      .Case("neq_os",   0x1C)
723      .Case("ge_oq",    0x1D)
724      .Case("gt_oq",    0x1E)
725      .Case("true_us",  0x1F)
726      .Default(~0U);
727    if (SSEComparisonCode != ~0U) {
728      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
729                                          getParser().getContext());
730      if (PatchedName.endswith("ss")) {
731        PatchedName = IsVCMP ? "vcmpss" : "cmpss";
732      } else if (PatchedName.endswith("sd")) {
733        PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
734      } else if (PatchedName.endswith("ps")) {
735        PatchedName = IsVCMP ? "vcmpps" : "cmpps";
736      } else {
737        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
738        PatchedName = IsVCMP ? "vcmppd" : "cmppd";
739      }
740    }
741  }
742
743  // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
744  if (PatchedName.startswith("vpclmul")) {
745    unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
746      PatchedName.slice(7, PatchedName.size() - 2))
747      .Case("lqlq", 0x00) // src1[63:0],   src2[63:0]
748      .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
749      .Case("lqhq", 0x10) // src1[63:0],   src2[127:64]
750      .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
751      .Default(~0U);
752    if (CLMULQuadWordSelect != ~0U) {
753      ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
754                                          getParser().getContext());
755      assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
756      PatchedName = "vpclmulqdq";
757    }
758  }
759
760  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
761
762  if (ExtraImmOp)
763    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
764
765
766  // Determine whether this is an instruction prefix.
767  bool isPrefix =
768    PatchedName == "lock" || PatchedName == "rep" ||
769    PatchedName == "repne";
770
771
772  // This does the actual operand parsing.  Don't parse any more if we have a
773  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
774  // just want to parse the "lock" as the first instruction and the "incl" as
775  // the next one.
776  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
777
778    // Parse '*' modifier.
779    if (getLexer().is(AsmToken::Star)) {
780      SMLoc Loc = Parser.getTok().getLoc();
781      Operands.push_back(X86Operand::CreateToken("*", Loc));
782      Parser.Lex(); // Eat the star.
783    }
784
785    // Read the first operand.
786    if (X86Operand *Op = ParseOperand())
787      Operands.push_back(Op);
788    else
789      return true;
790
791    while (getLexer().is(AsmToken::Comma)) {
792      Parser.Lex();  // Eat the comma.
793
794      // Parse and remember the operand.
795      if (X86Operand *Op = ParseOperand())
796        Operands.push_back(Op);
797      else
798        return true;
799    }
800
801    if (getLexer().isNot(AsmToken::EndOfStatement))
802      return TokError("unexpected token in argument list");
803  }
804
805  if (getLexer().is(AsmToken::EndOfStatement))
806    Parser.Lex(); // Consume the EndOfStatement
807
808  // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
809  if ((Name.startswith("shr") || Name.startswith("sar") ||
810       Name.startswith("shl")) &&
811      Operands.size() == 3) {
812    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
813    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
814        cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
815      delete Operands[1];
816      Operands.erase(Operands.begin() + 1);
817    }
818  }
819
820  // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
821  // "f{mul*,add*,sub*,div*} $op"
822  if ((Name.startswith("fmul") || Name.startswith("fadd") ||
823       Name.startswith("fsub") || Name.startswith("fdiv")) &&
824      Operands.size() == 3 &&
825      static_cast<X86Operand*>(Operands[2])->isReg() &&
826      static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
827    delete Operands[2];
828    Operands.erase(Operands.begin() + 2);
829  }
830
831  // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
832  // B".
833  if (Name.startswith("imul") && Operands.size() == 3 &&
834      static_cast<X86Operand*>(Operands[1])->isImm() &&
835      static_cast<X86Operand*>(Operands.back())->isReg()) {
836    X86Operand *Op = static_cast<X86Operand*>(Operands.back());
837    Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
838                                             Op->getEndLoc()));
839  }
840
841  // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
842  // effect (both store to a 16-bit mem).  Force to sldtw to avoid ambiguity
843  // errors, since its encoding is the most compact.
844  if (Name == "sldt" && Operands.size() == 2 &&
845      static_cast<X86Operand*>(Operands[1])->isMem()) {
846    delete Operands[0];
847    Operands[0] = X86Operand::CreateToken("sldtw", NameLoc);
848  }
849
850  // The assembler accepts "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as
851  // synonyms.  Our tables only have the "<reg>, <mem>" form, so if we see the
852  // other operand order, swap them.
853  if (Name == "xchgb" || Name == "xchgw" || Name == "xchgl" || Name == "xchgq")
854    if (Operands.size() == 3 &&
855        static_cast<X86Operand*>(Operands[1])->isMem() &&
856        static_cast<X86Operand*>(Operands[2])->isReg()) {
857      std::swap(Operands[1], Operands[2]);
858    }
859
860  // The assembler accepts "testX <reg>, <mem>" and "testX <mem>, <reg>" as
861  // synonyms.  Our tables only have the "<mem>, <reg>" form, so if we see the
862  // other operand order, swap them.
863  if (Name == "testb" || Name == "testw" || Name == "testl" || Name == "testq")
864    if (Operands.size() == 3 &&
865        static_cast<X86Operand*>(Operands[1])->isReg() &&
866        static_cast<X86Operand*>(Operands[2])->isMem()) {
867      std::swap(Operands[1], Operands[2]);
868    }
869
870  return false;
871}
872
873bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
874  StringRef IDVal = DirectiveID.getIdentifier();
875  if (IDVal == ".word")
876    return ParseDirectiveWord(2, DirectiveID.getLoc());
877  return true;
878}
879
880/// ParseDirectiveWord
881///  ::= .word [ expression (, expression)* ]
882bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
883  if (getLexer().isNot(AsmToken::EndOfStatement)) {
884    for (;;) {
885      const MCExpr *Value;
886      if (getParser().ParseExpression(Value))
887        return true;
888
889      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
890
891      if (getLexer().is(AsmToken::EndOfStatement))
892        break;
893
894      // FIXME: Improve diagnostic.
895      if (getLexer().isNot(AsmToken::Comma))
896        return Error(L, "unexpected token in directive");
897      Parser.Lex();
898    }
899  }
900
901  Parser.Lex();
902  return false;
903}
904
905
906bool
907X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
908                                  const SmallVectorImpl<MCParsedAsmOperand*>
909                                    &Operands,
910                                  MCInst &Inst) {
911  assert(!Operands.empty() && "Unexpect empty operand list!");
912
913  bool WasOriginallyInvalidOperand = false;
914  unsigned OrigErrorInfo;
915
916  // First, try a direct match.
917  switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
918  case Match_Success:
919    return false;
920  case Match_MissingFeature:
921    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
922    return true;
923  case Match_InvalidOperand:
924    WasOriginallyInvalidOperand = true;
925    break;
926  case Match_MnemonicFail:
927    break;
928  }
929
930  // FIXME: Ideally, we would only attempt suffix matches for things which are
931  // valid prefixes, and we could just infer the right unambiguous
932  // type. However, that requires substantially more matcher support than the
933  // following hack.
934
935  X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
936  assert(Op->isToken() && "Leading operand should always be a mnemonic!");
937
938  // Change the operand to point to a temporary token.
939  StringRef Base = Op->getToken();
940  SmallString<16> Tmp;
941  Tmp += Base;
942  Tmp += ' ';
943  Op->setTokenValue(Tmp.str());
944
945  // Check for the various suffix matches.
946  Tmp[Base.size()] = 'b';
947  unsigned BErrorInfo, WErrorInfo, LErrorInfo, QErrorInfo;
948  MatchResultTy MatchB = MatchInstructionImpl(Operands, Inst, BErrorInfo);
949  Tmp[Base.size()] = 'w';
950  MatchResultTy MatchW = MatchInstructionImpl(Operands, Inst, WErrorInfo);
951  Tmp[Base.size()] = 'l';
952  MatchResultTy MatchL = MatchInstructionImpl(Operands, Inst, LErrorInfo);
953  Tmp[Base.size()] = 'q';
954  MatchResultTy MatchQ = MatchInstructionImpl(Operands, Inst, QErrorInfo);
955
956  // Restore the old token.
957  Op->setTokenValue(Base);
958
959  // If exactly one matched, then we treat that as a successful match (and the
960  // instruction will already have been filled in correctly, since the failing
961  // matches won't have modified it).
962  unsigned NumSuccessfulMatches =
963    (MatchB == Match_Success) + (MatchW == Match_Success) +
964    (MatchL == Match_Success) + (MatchQ == Match_Success);
965  if (NumSuccessfulMatches == 1)
966    return false;
967
968  // Otherwise, the match failed, try to produce a decent error message.
969
970  // If we had multiple suffix matches, then identify this as an ambiguous
971  // match.
972  if (NumSuccessfulMatches > 1) {
973    char MatchChars[4];
974    unsigned NumMatches = 0;
975    if (MatchB == Match_Success)
976      MatchChars[NumMatches++] = 'b';
977    if (MatchW == Match_Success)
978      MatchChars[NumMatches++] = 'w';
979    if (MatchL == Match_Success)
980      MatchChars[NumMatches++] = 'l';
981    if (MatchQ == Match_Success)
982      MatchChars[NumMatches++] = 'q';
983
984    SmallString<126> Msg;
985    raw_svector_ostream OS(Msg);
986    OS << "ambiguous instructions require an explicit suffix (could be ";
987    for (unsigned i = 0; i != NumMatches; ++i) {
988      if (i != 0)
989        OS << ", ";
990      if (i + 1 == NumMatches)
991        OS << "or ";
992      OS << "'" << Base << MatchChars[i] << "'";
993    }
994    OS << ")";
995    Error(IDLoc, OS.str());
996    return true;
997  }
998
999  // Okay, we know that none of the variants matched successfully.
1000
1001  // If all of the instructions reported an invalid mnemonic, then the original
1002  // mnemonic was invalid.
1003  if ((MatchB == Match_MnemonicFail) && (MatchW == Match_MnemonicFail) &&
1004      (MatchL == Match_MnemonicFail) && (MatchQ == Match_MnemonicFail)) {
1005    if (!WasOriginallyInvalidOperand) {
1006      Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
1007      return true;
1008    }
1009
1010    // Recover location info for the operand if we know which was the problem.
1011    SMLoc ErrorLoc = IDLoc;
1012    if (OrigErrorInfo != ~0U) {
1013      ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
1014      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
1015    }
1016
1017    Error(ErrorLoc, "invalid operand for instruction");
1018    return true;
1019  }
1020
1021  // If one instruction matched with a missing feature, report this as a
1022  // missing feature.
1023  if ((MatchB == Match_MissingFeature) + (MatchW == Match_MissingFeature) +
1024      (MatchL == Match_MissingFeature) + (MatchQ == Match_MissingFeature) == 1){
1025    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
1026    return true;
1027  }
1028
1029  // If one instruction matched with an invalid operand, report this as an
1030  // operand failure.
1031  if ((MatchB == Match_InvalidOperand) + (MatchW == Match_InvalidOperand) +
1032      (MatchL == Match_InvalidOperand) + (MatchQ == Match_InvalidOperand) == 1){
1033    Error(IDLoc, "invalid operand for instruction");
1034    return true;
1035  }
1036
1037  // If all of these were an outright failure, report it in a useless way.
1038  // FIXME: We should give nicer diagnostics about the exact failure.
1039  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
1040  return true;
1041}
1042
1043
1044extern "C" void LLVMInitializeX86AsmLexer();
1045
1046// Force static initialization.
1047extern "C" void LLVMInitializeX86AsmParser() {
1048  RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
1049  RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
1050  LLVMInitializeX86AsmLexer();
1051}
1052
1053#define GET_REGISTER_MATCHER
1054#define GET_MATCHER_IMPLEMENTATION
1055#include "X86GenAsmMatcher.inc"
1056