X86AsmParser.cpp revision beb6898df8f96ccea4ae147587479b507bb3e491
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/Target/TargetAsmParser.h"
11#include "X86.h"
12#include "X86Subtarget.h"
13#include "llvm/Target/TargetRegistry.h"
14#include "llvm/Target/TargetAsmParser.h"
15#include "llvm/MC/MCStreamer.h"
16#include "llvm/MC/MCExpr.h"
17#include "llvm/MC/MCInst.h"
18#include "llvm/MC/MCParser/MCAsmLexer.h"
19#include "llvm/MC/MCParser/MCAsmParser.h"
20#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/ADT/StringSwitch.h"
25#include "llvm/ADT/Twine.h"
26#include "llvm/Support/SourceMgr.h"
27#include "llvm/Support/raw_ostream.h"
28using namespace llvm;
29
30namespace {
31struct X86Operand;
32
33class X86ATTAsmParser : public TargetAsmParser {
34  MCAsmParser &Parser;
35  TargetMachine &TM;
36
37protected:
38  unsigned Is64Bit : 1;
39
40private:
41  MCAsmParser &getParser() const { return Parser; }
42
43  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
44
45  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
46
47  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
48
49  X86Operand *ParseOperand();
50  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
51
52  bool ParseDirectiveWord(unsigned Size, SMLoc L);
53
54  bool MatchAndEmitInstruction(SMLoc IDLoc,
55                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
56                               MCStreamer &Out);
57
58  /// @name Auto-generated Matcher Functions
59  /// {
60
61#define GET_ASSEMBLER_HEADER
62#include "X86GenAsmMatcher.inc"
63
64  /// }
65
66public:
67  X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM)
68    : TargetAsmParser(T), Parser(parser), TM(TM) {
69
70    // Initialize the set of available features.
71    setAvailableFeatures(ComputeAvailableFeatures(
72                           &TM.getSubtarget<X86Subtarget>()));
73  }
74
75  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
76                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
77
78  virtual bool ParseDirective(AsmToken DirectiveID);
79};
80
81class X86_32ATTAsmParser : public X86ATTAsmParser {
82public:
83  X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
84    : X86ATTAsmParser(T, Parser, TM) {
85    Is64Bit = false;
86  }
87};
88
89class X86_64ATTAsmParser : public X86ATTAsmParser {
90public:
91  X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
92    : X86ATTAsmParser(T, Parser, TM) {
93    Is64Bit = true;
94  }
95};
96
97} // end anonymous namespace
98
99/// @name Auto-generated Match Functions
100/// {
101
102static unsigned MatchRegisterName(StringRef Name);
103
104/// }
105
106namespace {
107
108/// X86Operand - Instances of this class represent a parsed X86 machine
109/// instruction.
110struct X86Operand : public MCParsedAsmOperand {
111  enum KindTy {
112    Token,
113    Register,
114    Immediate,
115    Memory
116  } Kind;
117
118  SMLoc StartLoc, EndLoc;
119
120  union {
121    struct {
122      const char *Data;
123      unsigned Length;
124    } Tok;
125
126    struct {
127      unsigned RegNo;
128    } Reg;
129
130    struct {
131      const MCExpr *Val;
132    } Imm;
133
134    struct {
135      unsigned SegReg;
136      const MCExpr *Disp;
137      unsigned BaseReg;
138      unsigned IndexReg;
139      unsigned Scale;
140    } Mem;
141  };
142
143  X86Operand(KindTy K, SMLoc Start, SMLoc End)
144    : Kind(K), StartLoc(Start), EndLoc(End) {}
145
146  /// getStartLoc - Get the location of the first token of this operand.
147  SMLoc getStartLoc() const { return StartLoc; }
148  /// getEndLoc - Get the location of the last token of this operand.
149  SMLoc getEndLoc() const { return EndLoc; }
150
151  virtual void dump(raw_ostream &OS) const {}
152
153  StringRef getToken() const {
154    assert(Kind == Token && "Invalid access!");
155    return StringRef(Tok.Data, Tok.Length);
156  }
157  void setTokenValue(StringRef Value) {
158    assert(Kind == Token && "Invalid access!");
159    Tok.Data = Value.data();
160    Tok.Length = Value.size();
161  }
162
163  unsigned getReg() const {
164    assert(Kind == Register && "Invalid access!");
165    return Reg.RegNo;
166  }
167
168  const MCExpr *getImm() const {
169    assert(Kind == Immediate && "Invalid access!");
170    return Imm.Val;
171  }
172
173  const MCExpr *getMemDisp() const {
174    assert(Kind == Memory && "Invalid access!");
175    return Mem.Disp;
176  }
177  unsigned getMemSegReg() const {
178    assert(Kind == Memory && "Invalid access!");
179    return Mem.SegReg;
180  }
181  unsigned getMemBaseReg() const {
182    assert(Kind == Memory && "Invalid access!");
183    return Mem.BaseReg;
184  }
185  unsigned getMemIndexReg() const {
186    assert(Kind == Memory && "Invalid access!");
187    return Mem.IndexReg;
188  }
189  unsigned getMemScale() const {
190    assert(Kind == Memory && "Invalid access!");
191    return Mem.Scale;
192  }
193
194  bool isToken() const {return Kind == Token; }
195
196  bool isImm() const { return Kind == Immediate; }
197
198  bool isImmSExti16i8() const {
199    if (!isImm())
200      return false;
201
202    // If this isn't a constant expr, just assume it fits and let relaxation
203    // handle it.
204    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
205    if (!CE)
206      return true;
207
208    // Otherwise, check the value is in a range that makes sense for this
209    // extension.
210    uint64_t Value = CE->getValue();
211    return ((                                  Value <= 0x000000000000007FULL)||
212            (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
213            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
214  }
215  bool isImmSExti32i8() const {
216    if (!isImm())
217      return false;
218
219    // If this isn't a constant expr, just assume it fits and let relaxation
220    // handle it.
221    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
222    if (!CE)
223      return true;
224
225    // Otherwise, check the value is in a range that makes sense for this
226    // extension.
227    uint64_t Value = CE->getValue();
228    return ((                                  Value <= 0x000000000000007FULL)||
229            (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
230            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
231  }
232  bool isImmSExti64i8() const {
233    if (!isImm())
234      return false;
235
236    // If this isn't a constant expr, just assume it fits and let relaxation
237    // handle it.
238    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
239    if (!CE)
240      return true;
241
242    // Otherwise, check the value is in a range that makes sense for this
243    // extension.
244    uint64_t Value = CE->getValue();
245    return ((                                  Value <= 0x000000000000007FULL)||
246            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
247  }
248  bool isImmSExti64i32() const {
249    if (!isImm())
250      return false;
251
252    // If this isn't a constant expr, just assume it fits and let relaxation
253    // handle it.
254    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
255    if (!CE)
256      return true;
257
258    // Otherwise, check the value is in a range that makes sense for this
259    // extension.
260    uint64_t Value = CE->getValue();
261    return ((                                  Value <= 0x000000007FFFFFFFULL)||
262            (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
263  }
264
265  bool isMem() const { return Kind == Memory; }
266
267  bool isAbsMem() const {
268    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
269      !getMemIndexReg() && getMemScale() == 1;
270  }
271
272  bool isReg() const { return Kind == Register; }
273
274  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
275    // Add as immediates when possible.
276    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
277      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
278    else
279      Inst.addOperand(MCOperand::CreateExpr(Expr));
280  }
281
282  void addRegOperands(MCInst &Inst, unsigned N) const {
283    assert(N == 1 && "Invalid number of operands!");
284    Inst.addOperand(MCOperand::CreateReg(getReg()));
285  }
286
287  void addImmOperands(MCInst &Inst, unsigned N) const {
288    assert(N == 1 && "Invalid number of operands!");
289    addExpr(Inst, getImm());
290  }
291
292  void addMemOperands(MCInst &Inst, unsigned N) const {
293    assert((N == 5) && "Invalid number of operands!");
294    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
295    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
296    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
297    addExpr(Inst, getMemDisp());
298    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
299  }
300
301  void addAbsMemOperands(MCInst &Inst, unsigned N) const {
302    assert((N == 1) && "Invalid number of operands!");
303    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
304  }
305
306  static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
307    X86Operand *Res = new X86Operand(Token, Loc, Loc);
308    Res->Tok.Data = Str.data();
309    Res->Tok.Length = Str.size();
310    return Res;
311  }
312
313  static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
314    X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
315    Res->Reg.RegNo = RegNo;
316    return Res;
317  }
318
319  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
320    X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
321    Res->Imm.Val = Val;
322    return Res;
323  }
324
325  /// Create an absolute memory operand.
326  static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
327                               SMLoc EndLoc) {
328    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
329    Res->Mem.SegReg   = 0;
330    Res->Mem.Disp     = Disp;
331    Res->Mem.BaseReg  = 0;
332    Res->Mem.IndexReg = 0;
333    Res->Mem.Scale    = 1;
334    return Res;
335  }
336
337  /// Create a generalized memory operand.
338  static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
339                               unsigned BaseReg, unsigned IndexReg,
340                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
341    // We should never just have a displacement, that should be parsed as an
342    // absolute memory operand.
343    assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
344
345    // The scale should always be one of {1,2,4,8}.
346    assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
347           "Invalid scale!");
348    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
349    Res->Mem.SegReg   = SegReg;
350    Res->Mem.Disp     = Disp;
351    Res->Mem.BaseReg  = BaseReg;
352    Res->Mem.IndexReg = IndexReg;
353    Res->Mem.Scale    = Scale;
354    return Res;
355  }
356};
357
358} // end anonymous namespace.
359
360
361bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
362                                    SMLoc &StartLoc, SMLoc &EndLoc) {
363  RegNo = 0;
364  const AsmToken &TokPercent = Parser.getTok();
365  assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
366  StartLoc = TokPercent.getLoc();
367  Parser.Lex(); // Eat percent token.
368
369  const AsmToken &Tok = Parser.getTok();
370  if (Tok.isNot(AsmToken::Identifier))
371    return Error(Tok.getLoc(), "invalid register name");
372
373  // FIXME: Validate register for the current architecture; we have to do
374  // validation later, so maybe there is no need for this here.
375  RegNo = MatchRegisterName(Tok.getString());
376
377  // If the match failed, try the register name as lowercase.
378  if (RegNo == 0)
379    RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
380
381  // FIXME: This should be done using Requires<In32BitMode> and
382  // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
383  // can be also checked.
384  if (RegNo == X86::RIZ && !Is64Bit)
385    return Error(Tok.getLoc(), "riz register in 64-bit mode only");
386
387  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
388  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
389    RegNo = X86::ST0;
390    EndLoc = Tok.getLoc();
391    Parser.Lex(); // Eat 'st'
392
393    // Check to see if we have '(4)' after %st.
394    if (getLexer().isNot(AsmToken::LParen))
395      return false;
396    // Lex the paren.
397    getParser().Lex();
398
399    const AsmToken &IntTok = Parser.getTok();
400    if (IntTok.isNot(AsmToken::Integer))
401      return Error(IntTok.getLoc(), "expected stack index");
402    switch (IntTok.getIntVal()) {
403    case 0: RegNo = X86::ST0; break;
404    case 1: RegNo = X86::ST1; break;
405    case 2: RegNo = X86::ST2; break;
406    case 3: RegNo = X86::ST3; break;
407    case 4: RegNo = X86::ST4; break;
408    case 5: RegNo = X86::ST5; break;
409    case 6: RegNo = X86::ST6; break;
410    case 7: RegNo = X86::ST7; break;
411    default: return Error(IntTok.getLoc(), "invalid stack index");
412    }
413
414    if (getParser().Lex().isNot(AsmToken::RParen))
415      return Error(Parser.getTok().getLoc(), "expected ')'");
416
417    EndLoc = Tok.getLoc();
418    Parser.Lex(); // Eat ')'
419    return false;
420  }
421
422  // If this is "db[0-7]", match it as an alias
423  // for dr[0-7].
424  if (RegNo == 0 && Tok.getString().size() == 3 &&
425      Tok.getString().startswith("db")) {
426    switch (Tok.getString()[2]) {
427    case '0': RegNo = X86::DR0; break;
428    case '1': RegNo = X86::DR1; break;
429    case '2': RegNo = X86::DR2; break;
430    case '3': RegNo = X86::DR3; break;
431    case '4': RegNo = X86::DR4; break;
432    case '5': RegNo = X86::DR5; break;
433    case '6': RegNo = X86::DR6; break;
434    case '7': RegNo = X86::DR7; break;
435    }
436
437    if (RegNo != 0) {
438      EndLoc = Tok.getLoc();
439      Parser.Lex(); // Eat it.
440      return false;
441    }
442  }
443
444  if (RegNo == 0)
445    return Error(Tok.getLoc(), "invalid register name");
446
447  EndLoc = Tok.getLoc();
448  Parser.Lex(); // Eat identifier token.
449  return false;
450}
451
452X86Operand *X86ATTAsmParser::ParseOperand() {
453  switch (getLexer().getKind()) {
454  default:
455    // Parse a memory operand with no segment register.
456    return ParseMemOperand(0, Parser.getTok().getLoc());
457  case AsmToken::Percent: {
458    // Read the register.
459    unsigned RegNo;
460    SMLoc Start, End;
461    if (ParseRegister(RegNo, Start, End)) return 0;
462    if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
463      Error(Start, "eiz and riz can only be used as index registers");
464      return 0;
465    }
466
467    // If this is a segment register followed by a ':', then this is the start
468    // of a memory reference, otherwise this is a normal register reference.
469    if (getLexer().isNot(AsmToken::Colon))
470      return X86Operand::CreateReg(RegNo, Start, End);
471
472
473    getParser().Lex(); // Eat the colon.
474    return ParseMemOperand(RegNo, Start);
475  }
476  case AsmToken::Dollar: {
477    // $42 -> immediate.
478    SMLoc Start = Parser.getTok().getLoc(), End;
479    Parser.Lex();
480    const MCExpr *Val;
481    if (getParser().ParseExpression(Val, End))
482      return 0;
483    return X86Operand::CreateImm(Val, Start, End);
484  }
485  }
486}
487
488/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
489/// has already been parsed if present.
490X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
491
492  // We have to disambiguate a parenthesized expression "(4+5)" from the start
493  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
494  // only way to do this without lookahead is to eat the '(' and see what is
495  // after it.
496  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
497  if (getLexer().isNot(AsmToken::LParen)) {
498    SMLoc ExprEnd;
499    if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
500
501    // After parsing the base expression we could either have a parenthesized
502    // memory address or not.  If not, return now.  If so, eat the (.
503    if (getLexer().isNot(AsmToken::LParen)) {
504      // Unless we have a segment register, treat this as an immediate.
505      if (SegReg == 0)
506        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
507      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
508    }
509
510    // Eat the '('.
511    Parser.Lex();
512  } else {
513    // Okay, we have a '('.  We don't know if this is an expression or not, but
514    // so we have to eat the ( to see beyond it.
515    SMLoc LParenLoc = Parser.getTok().getLoc();
516    Parser.Lex(); // Eat the '('.
517
518    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
519      // Nothing to do here, fall into the code below with the '(' part of the
520      // memory operand consumed.
521    } else {
522      SMLoc ExprEnd;
523
524      // It must be an parenthesized expression, parse it now.
525      if (getParser().ParseParenExpression(Disp, ExprEnd))
526        return 0;
527
528      // After parsing the base expression we could either have a parenthesized
529      // memory address or not.  If not, return now.  If so, eat the (.
530      if (getLexer().isNot(AsmToken::LParen)) {
531        // Unless we have a segment register, treat this as an immediate.
532        if (SegReg == 0)
533          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
534        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
535      }
536
537      // Eat the '('.
538      Parser.Lex();
539    }
540  }
541
542  // If we reached here, then we just ate the ( of the memory operand.  Process
543  // the rest of the memory operand.
544  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
545
546  if (getLexer().is(AsmToken::Percent)) {
547    SMLoc L;
548    if (ParseRegister(BaseReg, L, L)) return 0;
549    if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
550      Error(L, "eiz and riz can only be used as index registers");
551      return 0;
552    }
553  }
554
555  if (getLexer().is(AsmToken::Comma)) {
556    Parser.Lex(); // Eat the comma.
557
558    // Following the comma we should have either an index register, or a scale
559    // value. We don't support the later form, but we want to parse it
560    // correctly.
561    //
562    // Not that even though it would be completely consistent to support syntax
563    // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
564    if (getLexer().is(AsmToken::Percent)) {
565      SMLoc L;
566      if (ParseRegister(IndexReg, L, L)) return 0;
567
568      if (getLexer().isNot(AsmToken::RParen)) {
569        // Parse the scale amount:
570        //  ::= ',' [scale-expression]
571        if (getLexer().isNot(AsmToken::Comma)) {
572          Error(Parser.getTok().getLoc(),
573                "expected comma in scale expression");
574          return 0;
575        }
576        Parser.Lex(); // Eat the comma.
577
578        if (getLexer().isNot(AsmToken::RParen)) {
579          SMLoc Loc = Parser.getTok().getLoc();
580
581          int64_t ScaleVal;
582          if (getParser().ParseAbsoluteExpression(ScaleVal))
583            return 0;
584
585          // Validate the scale amount.
586          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
587            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
588            return 0;
589          }
590          Scale = (unsigned)ScaleVal;
591        }
592      }
593    } else if (getLexer().isNot(AsmToken::RParen)) {
594      // A scale amount without an index is ignored.
595      // index.
596      SMLoc Loc = Parser.getTok().getLoc();
597
598      int64_t Value;
599      if (getParser().ParseAbsoluteExpression(Value))
600        return 0;
601
602      if (Value != 1)
603        Warning(Loc, "scale factor without index register is ignored");
604      Scale = 1;
605    }
606  }
607
608  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
609  if (getLexer().isNot(AsmToken::RParen)) {
610    Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
611    return 0;
612  }
613  SMLoc MemEnd = Parser.getTok().getLoc();
614  Parser.Lex(); // Eat the ')'.
615
616  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
617                               MemStart, MemEnd);
618}
619
620bool X86ATTAsmParser::
621ParseInstruction(StringRef Name, SMLoc NameLoc,
622                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
623  StringRef PatchedName = Name;
624
625  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
626  const MCExpr *ExtraImmOp = 0;
627  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
628      (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
629       PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
630    bool IsVCMP = PatchedName.startswith("vcmp");
631    unsigned SSECCIdx = IsVCMP ? 4 : 3;
632    unsigned SSEComparisonCode = StringSwitch<unsigned>(
633      PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
634      .Case("eq",          0)
635      .Case("lt",          1)
636      .Case("le",          2)
637      .Case("unord",       3)
638      .Case("neq",         4)
639      .Case("nlt",         5)
640      .Case("nle",         6)
641      .Case("ord",         7)
642      .Case("eq_uq",       8)
643      .Case("nge",         9)
644      .Case("ngt",      0x0A)
645      .Case("false",    0x0B)
646      .Case("neq_oq",   0x0C)
647      .Case("ge",       0x0D)
648      .Case("gt",       0x0E)
649      .Case("true",     0x0F)
650      .Case("eq_os",    0x10)
651      .Case("lt_oq",    0x11)
652      .Case("le_oq",    0x12)
653      .Case("unord_s",  0x13)
654      .Case("neq_us",   0x14)
655      .Case("nlt_uq",   0x15)
656      .Case("nle_uq",   0x16)
657      .Case("ord_s",    0x17)
658      .Case("eq_us",    0x18)
659      .Case("nge_uq",   0x19)
660      .Case("ngt_uq",   0x1A)
661      .Case("false_os", 0x1B)
662      .Case("neq_os",   0x1C)
663      .Case("ge_oq",    0x1D)
664      .Case("gt_oq",    0x1E)
665      .Case("true_us",  0x1F)
666      .Default(~0U);
667    if (SSEComparisonCode != ~0U) {
668      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
669                                          getParser().getContext());
670      if (PatchedName.endswith("ss")) {
671        PatchedName = IsVCMP ? "vcmpss" : "cmpss";
672      } else if (PatchedName.endswith("sd")) {
673        PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
674      } else if (PatchedName.endswith("ps")) {
675        PatchedName = IsVCMP ? "vcmpps" : "cmpps";
676      } else {
677        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
678        PatchedName = IsVCMP ? "vcmppd" : "cmppd";
679      }
680    }
681  }
682
683  // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
684  if (PatchedName.startswith("vpclmul")) {
685    unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
686      PatchedName.slice(7, PatchedName.size() - 2))
687      .Case("lqlq", 0x00) // src1[63:0],   src2[63:0]
688      .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
689      .Case("lqhq", 0x10) // src1[63:0],   src2[127:64]
690      .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
691      .Default(~0U);
692    if (CLMULQuadWordSelect != ~0U) {
693      ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
694                                          getParser().getContext());
695      assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
696      PatchedName = "vpclmulqdq";
697    }
698  }
699
700  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
701
702  if (ExtraImmOp)
703    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
704
705
706  // Determine whether this is an instruction prefix.
707  bool isPrefix =
708    Name == "lock" || Name == "rep" ||
709    Name == "repe" || Name == "repz" ||
710    Name == "repne" || Name == "repnz" ||
711    Name == "rex64";
712
713
714  // This does the actual operand parsing.  Don't parse any more if we have a
715  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
716  // just want to parse the "lock" as the first instruction and the "incl" as
717  // the next one.
718  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
719
720    // Parse '*' modifier.
721    if (getLexer().is(AsmToken::Star)) {
722      SMLoc Loc = Parser.getTok().getLoc();
723      Operands.push_back(X86Operand::CreateToken("*", Loc));
724      Parser.Lex(); // Eat the star.
725    }
726
727    // Read the first operand.
728    if (X86Operand *Op = ParseOperand())
729      Operands.push_back(Op);
730    else {
731      Parser.EatToEndOfStatement();
732      return true;
733    }
734
735    while (getLexer().is(AsmToken::Comma)) {
736      Parser.Lex();  // Eat the comma.
737
738      // Parse and remember the operand.
739      if (X86Operand *Op = ParseOperand())
740        Operands.push_back(Op);
741      else {
742        Parser.EatToEndOfStatement();
743        return true;
744      }
745    }
746
747    if (getLexer().isNot(AsmToken::EndOfStatement)) {
748      SMLoc Loc = getLexer().getLoc();
749      Parser.EatToEndOfStatement();
750      return Error(Loc, "unexpected token in argument list");
751    }
752  }
753
754  if (getLexer().is(AsmToken::EndOfStatement))
755    Parser.Lex(); // Consume the EndOfStatement
756
757  // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
758  // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
759  // documented form in various unofficial manuals, so a lot of code uses it.
760  if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
761      Operands.size() == 3) {
762    X86Operand &Op = *(X86Operand*)Operands.back();
763    if (Op.isMem() && Op.Mem.SegReg == 0 &&
764        isa<MCConstantExpr>(Op.Mem.Disp) &&
765        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
766        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
767      SMLoc Loc = Op.getEndLoc();
768      Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
769      delete &Op;
770    }
771  }
772
773  // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
774  // "shift <op>".
775  if ((Name.startswith("shr") || Name.startswith("sar") ||
776       Name.startswith("shl") || Name.startswith("sal") ||
777       Name.startswith("rcl") || Name.startswith("rcr") ||
778       Name.startswith("rol") || Name.startswith("ror")) &&
779      Operands.size() == 3) {
780    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
781    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
782        cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
783      delete Operands[1];
784      Operands.erase(Operands.begin() + 1);
785    }
786  }
787
788  return false;
789}
790
791bool X86ATTAsmParser::
792MatchAndEmitInstruction(SMLoc IDLoc,
793                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
794                        MCStreamer &Out) {
795  assert(!Operands.empty() && "Unexpect empty operand list!");
796  X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
797  assert(Op->isToken() && "Leading operand should always be a mnemonic!");
798
799  // First, handle aliases that expand to multiple instructions.
800  // FIXME: This should be replaced with a real .td file alias mechanism.
801  // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
802  // call.
803  if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
804      Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
805      Op->getToken() == "finit" || Op->getToken() == "fsave" ||
806      Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
807    MCInst Inst;
808    Inst.setOpcode(X86::WAIT);
809    Out.EmitInstruction(Inst);
810
811    const char *Repl =
812      StringSwitch<const char*>(Op->getToken())
813        .Case("finit",  "fninit")
814        .Case("fsave",  "fnsave")
815        .Case("fstcw",  "fnstcw")
816        .Case("fstcww",  "fnstcw")
817        .Case("fstenv", "fnstenv")
818        .Case("fstsw",  "fnstsw")
819        .Case("fstsww", "fnstsw")
820        .Case("fclex",  "fnclex")
821        .Default(0);
822    assert(Repl && "Unknown wait-prefixed instruction");
823    delete Operands[0];
824    Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
825  }
826
827  bool WasOriginallyInvalidOperand = false;
828  unsigned OrigErrorInfo;
829  MCInst Inst;
830
831  // First, try a direct match.
832  switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
833  case Match_Success:
834    Out.EmitInstruction(Inst);
835    return false;
836  case Match_MissingFeature:
837    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
838    return true;
839  case Match_InvalidOperand:
840    WasOriginallyInvalidOperand = true;
841    break;
842  case Match_MnemonicFail:
843    break;
844  }
845
846  // FIXME: Ideally, we would only attempt suffix matches for things which are
847  // valid prefixes, and we could just infer the right unambiguous
848  // type. However, that requires substantially more matcher support than the
849  // following hack.
850
851  // Change the operand to point to a temporary token.
852  StringRef Base = Op->getToken();
853  SmallString<16> Tmp;
854  Tmp += Base;
855  Tmp += ' ';
856  Op->setTokenValue(Tmp.str());
857
858  // If this instruction starts with an 'f', then it is a floating point stack
859  // instruction.  These come in up to three forms for 32-bit, 64-bit, and
860  // 80-bit floating point, which use the suffixes s,l,t respectively.
861  //
862  // Otherwise, we assume that this may be an integer instruction, which comes
863  // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
864  const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
865
866  // Check for the various suffix matches.
867  Tmp[Base.size()] = Suffixes[0];
868  unsigned ErrorInfoIgnore;
869  MatchResultTy Match1, Match2, Match3, Match4;
870
871  Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
872  Tmp[Base.size()] = Suffixes[1];
873  Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
874  Tmp[Base.size()] = Suffixes[2];
875  Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
876  Tmp[Base.size()] = Suffixes[3];
877  Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
878
879  // Restore the old token.
880  Op->setTokenValue(Base);
881
882  // If exactly one matched, then we treat that as a successful match (and the
883  // instruction will already have been filled in correctly, since the failing
884  // matches won't have modified it).
885  unsigned NumSuccessfulMatches =
886    (Match1 == Match_Success) + (Match2 == Match_Success) +
887    (Match3 == Match_Success) + (Match4 == Match_Success);
888  if (NumSuccessfulMatches == 1) {
889    Out.EmitInstruction(Inst);
890    return false;
891  }
892
893  // Otherwise, the match failed, try to produce a decent error message.
894
895  // If we had multiple suffix matches, then identify this as an ambiguous
896  // match.
897  if (NumSuccessfulMatches > 1) {
898    char MatchChars[4];
899    unsigned NumMatches = 0;
900    if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
901    if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
902    if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
903    if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
904
905    SmallString<126> Msg;
906    raw_svector_ostream OS(Msg);
907    OS << "ambiguous instructions require an explicit suffix (could be ";
908    for (unsigned i = 0; i != NumMatches; ++i) {
909      if (i != 0)
910        OS << ", ";
911      if (i + 1 == NumMatches)
912        OS << "or ";
913      OS << "'" << Base << MatchChars[i] << "'";
914    }
915    OS << ")";
916    Error(IDLoc, OS.str());
917    return true;
918  }
919
920  // Okay, we know that none of the variants matched successfully.
921
922  // If all of the instructions reported an invalid mnemonic, then the original
923  // mnemonic was invalid.
924  if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
925      (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
926    if (!WasOriginallyInvalidOperand) {
927      Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
928      return true;
929    }
930
931    // Recover location info for the operand if we know which was the problem.
932    SMLoc ErrorLoc = IDLoc;
933    if (OrigErrorInfo != ~0U) {
934      if (OrigErrorInfo >= Operands.size())
935        return Error(IDLoc, "too few operands for instruction");
936
937      ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
938      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
939    }
940
941    return Error(ErrorLoc, "invalid operand for instruction");
942  }
943
944  // If one instruction matched with a missing feature, report this as a
945  // missing feature.
946  if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
947      (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
948    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
949    return true;
950  }
951
952  // If one instruction matched with an invalid operand, report this as an
953  // operand failure.
954  if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
955      (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
956    Error(IDLoc, "invalid operand for instruction");
957    return true;
958  }
959
960  // If all of these were an outright failure, report it in a useless way.
961  // FIXME: We should give nicer diagnostics about the exact failure.
962  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
963  return true;
964}
965
966
967bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
968  StringRef IDVal = DirectiveID.getIdentifier();
969  if (IDVal == ".word")
970    return ParseDirectiveWord(2, DirectiveID.getLoc());
971  return true;
972}
973
974/// ParseDirectiveWord
975///  ::= .word [ expression (, expression)* ]
976bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
977  if (getLexer().isNot(AsmToken::EndOfStatement)) {
978    for (;;) {
979      const MCExpr *Value;
980      if (getParser().ParseExpression(Value))
981        return true;
982
983      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
984
985      if (getLexer().is(AsmToken::EndOfStatement))
986        break;
987
988      // FIXME: Improve diagnostic.
989      if (getLexer().isNot(AsmToken::Comma))
990        return Error(L, "unexpected token in directive");
991      Parser.Lex();
992    }
993  }
994
995  Parser.Lex();
996  return false;
997}
998
999
1000
1001
1002extern "C" void LLVMInitializeX86AsmLexer();
1003
1004// Force static initialization.
1005extern "C" void LLVMInitializeX86AsmParser() {
1006  RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
1007  RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
1008  LLVMInitializeX86AsmLexer();
1009}
1010
1011#define GET_REGISTER_MATCHER
1012#define GET_MATCHER_IMPLEMENTATION
1013#include "X86GenAsmMatcher.inc"
1014