X86AsmParser.cpp revision a28101e61aa3aeed5baf3d5b91d0f8bcb4e9e12a
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "MCTargetDesc/X86BaseInfo.h"
11#include "llvm/MC/MCTargetAsmParser.h"
12#include "llvm/MC/MCStreamer.h"
13#include "llvm/MC/MCExpr.h"
14#include "llvm/MC/MCInst.h"
15#include "llvm/MC/MCRegisterInfo.h"
16#include "llvm/MC/MCSubtargetInfo.h"
17#include "llvm/MC/MCParser/MCAsmLexer.h"
18#include "llvm/MC/MCParser/MCAsmParser.h"
19#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
20#include "llvm/ADT/OwningPtr.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringSwitch.h"
24#include "llvm/ADT/Twine.h"
25#include "llvm/Support/SourceMgr.h"
26#include "llvm/Support/TargetRegistry.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31namespace {
32struct X86Operand;
33
34class X86AsmParser : public MCTargetAsmParser {
35  MCSubtargetInfo &STI;
36  MCAsmParser &Parser;
37
38private:
39  MCAsmParser &getParser() const { return Parser; }
40
41  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
42
43  bool Error(SMLoc L, const Twine &Msg,
44             ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
45    return Parser.Error(L, Msg, Ranges);
46  }
47
48  X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
49    Error(Loc, Msg);
50    return 0;
51  }
52
53  X86Operand *ParseOperand();
54  X86Operand *ParseATTOperand();
55  X86Operand *ParseIntelOperand();
56  X86Operand *ParseIntelMemOperand();
57  X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
58  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
59
60  bool ParseDirectiveWord(unsigned Size, SMLoc L);
61  bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
62
63  bool processInstruction(MCInst &Inst,
64                          const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
65
66  bool MatchAndEmitInstruction(SMLoc IDLoc,
67                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
68                               MCStreamer &Out);
69
70  /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
71  /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
72  bool isSrcOp(X86Operand &Op);
73
74  /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
75  /// or %es:(%edi) in 32bit mode.
76  bool isDstOp(X86Operand &Op);
77
78  bool is64BitMode() const {
79    // FIXME: Can tablegen auto-generate this?
80    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
81  }
82  void SwitchMode() {
83    unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
84    setAvailableFeatures(FB);
85  }
86
87  /// @name Auto-generated Matcher Functions
88  /// {
89
90#define GET_ASSEMBLER_HEADER
91#include "X86GenAsmMatcher.inc"
92
93  /// }
94
95public:
96  X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
97    : MCTargetAsmParser(), STI(sti), Parser(parser) {
98
99    // Initialize the set of available features.
100    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
101  }
102  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
103
104  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
105                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
106
107  virtual bool ParseDirective(AsmToken DirectiveID);
108};
109} // end anonymous namespace
110
111/// @name Auto-generated Match Functions
112/// {
113
114static unsigned MatchRegisterName(StringRef Name);
115
116/// }
117
118static  bool isImmSExti16i8Value(uint64_t Value) {
119  return ((                                  Value <= 0x000000000000007FULL)||
120          (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
121          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
122}
123
124static bool isImmSExti32i8Value(uint64_t Value) {
125  return ((                                  Value <= 0x000000000000007FULL)||
126          (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
127          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
128}
129
130static bool isImmZExtu32u8Value(uint64_t Value) {
131    return (Value <= 0x00000000000000FFULL);
132}
133
134static bool isImmSExti64i8Value(uint64_t Value) {
135  return ((                                  Value <= 0x000000000000007FULL)||
136	  (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
137}
138
139static bool isImmSExti64i32Value(uint64_t Value) {
140  return ((                                  Value <= 0x000000007FFFFFFFULL)||
141	  (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
142}
143namespace {
144
145/// X86Operand - Instances of this class represent a parsed X86 machine
146/// instruction.
147struct X86Operand : public MCParsedAsmOperand {
148  enum KindTy {
149    Token,
150    Register,
151    Immediate,
152    Memory
153  } Kind;
154
155  SMLoc StartLoc, EndLoc;
156
157  union {
158    struct {
159      const char *Data;
160      unsigned Length;
161    } Tok;
162
163    struct {
164      unsigned RegNo;
165    } Reg;
166
167    struct {
168      const MCExpr *Val;
169    } Imm;
170
171    struct {
172      unsigned SegReg;
173      const MCExpr *Disp;
174      unsigned BaseReg;
175      unsigned IndexReg;
176      unsigned Scale;
177      unsigned Size;
178    } Mem;
179  };
180
181  X86Operand(KindTy K, SMLoc Start, SMLoc End)
182    : Kind(K), StartLoc(Start), EndLoc(End) {}
183
184  /// getStartLoc - Get the location of the first token of this operand.
185  SMLoc getStartLoc() const { return StartLoc; }
186  /// getEndLoc - Get the location of the last token of this operand.
187  SMLoc getEndLoc() const { return EndLoc; }
188
189  SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
190
191  virtual void print(raw_ostream &OS) const {}
192
193  StringRef getToken() const {
194    assert(Kind == Token && "Invalid access!");
195    return StringRef(Tok.Data, Tok.Length);
196  }
197  void setTokenValue(StringRef Value) {
198    assert(Kind == Token && "Invalid access!");
199    Tok.Data = Value.data();
200    Tok.Length = Value.size();
201  }
202
203  unsigned getReg() const {
204    assert(Kind == Register && "Invalid access!");
205    return Reg.RegNo;
206  }
207
208  const MCExpr *getImm() const {
209    assert(Kind == Immediate && "Invalid access!");
210    return Imm.Val;
211  }
212
213  const MCExpr *getMemDisp() const {
214    assert(Kind == Memory && "Invalid access!");
215    return Mem.Disp;
216  }
217  unsigned getMemSegReg() const {
218    assert(Kind == Memory && "Invalid access!");
219    return Mem.SegReg;
220  }
221  unsigned getMemBaseReg() const {
222    assert(Kind == Memory && "Invalid access!");
223    return Mem.BaseReg;
224  }
225  unsigned getMemIndexReg() const {
226    assert(Kind == Memory && "Invalid access!");
227    return Mem.IndexReg;
228  }
229  unsigned getMemScale() const {
230    assert(Kind == Memory && "Invalid access!");
231    return Mem.Scale;
232  }
233
234  bool isToken() const {return Kind == Token; }
235
236  bool isImm() const { return Kind == Immediate; }
237
238  bool isImmSExti16i8() const {
239    if (!isImm())
240      return false;
241
242    // If this isn't a constant expr, just assume it fits and let relaxation
243    // handle it.
244    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
245    if (!CE)
246      return true;
247
248    // Otherwise, check the value is in a range that makes sense for this
249    // extension.
250    return isImmSExti16i8Value(CE->getValue());
251  }
252  bool isImmSExti32i8() const {
253    if (!isImm())
254      return false;
255
256    // If this isn't a constant expr, just assume it fits and let relaxation
257    // handle it.
258    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
259    if (!CE)
260      return true;
261
262    // Otherwise, check the value is in a range that makes sense for this
263    // extension.
264    return isImmSExti32i8Value(CE->getValue());
265  }
266  bool isImmZExtu32u8() const {
267    if (!isImm())
268      return false;
269
270    // If this isn't a constant expr, just assume it fits and let relaxation
271    // handle it.
272    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
273    if (!CE)
274      return true;
275
276    // Otherwise, check the value is in a range that makes sense for this
277    // extension.
278    return isImmZExtu32u8Value(CE->getValue());
279  }
280  bool isImmSExti64i8() const {
281    if (!isImm())
282      return false;
283
284    // If this isn't a constant expr, just assume it fits and let relaxation
285    // handle it.
286    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
287    if (!CE)
288      return true;
289
290    // Otherwise, check the value is in a range that makes sense for this
291    // extension.
292    return isImmSExti64i8Value(CE->getValue());
293  }
294  bool isImmSExti64i32() const {
295    if (!isImm())
296      return false;
297
298    // If this isn't a constant expr, just assume it fits and let relaxation
299    // handle it.
300    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
301    if (!CE)
302      return true;
303
304    // Otherwise, check the value is in a range that makes sense for this
305    // extension.
306    return isImmSExti64i32Value(CE->getValue());
307  }
308
309  bool isMem() const { return Kind == Memory; }
310  bool isMem8() const {
311    return Kind == Memory && (!Mem.Size || Mem.Size == 8);
312  }
313  bool isMem16() const {
314    return Kind == Memory && (!Mem.Size || Mem.Size == 16);
315  }
316  bool isMem32() const {
317    return Kind == Memory && (!Mem.Size || Mem.Size == 32);
318  }
319  bool isMem64() const {
320    return Kind == Memory && (!Mem.Size || Mem.Size == 64);
321  }
322  bool isMem80() const {
323    return Kind == Memory && (!Mem.Size || Mem.Size == 80);
324  }
325  bool isMem128() const {
326    return Kind == Memory && (!Mem.Size || Mem.Size == 128);
327  }
328  bool isMem256() const {
329    return Kind == Memory && (!Mem.Size || Mem.Size == 256);
330  }
331
332  bool isAbsMem() const {
333    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
334      !getMemIndexReg() && getMemScale() == 1;
335  }
336
337  bool isReg() const { return Kind == Register; }
338
339  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
340    // Add as immediates when possible.
341    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
342      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
343    else
344      Inst.addOperand(MCOperand::CreateExpr(Expr));
345  }
346
347  void addRegOperands(MCInst &Inst, unsigned N) const {
348    assert(N == 1 && "Invalid number of operands!");
349    Inst.addOperand(MCOperand::CreateReg(getReg()));
350  }
351
352  void addImmOperands(MCInst &Inst, unsigned N) const {
353    assert(N == 1 && "Invalid number of operands!");
354    addExpr(Inst, getImm());
355  }
356
357  void addMem8Operands(MCInst &Inst, unsigned N) const {
358    addMemOperands(Inst, N);
359  }
360  void addMem16Operands(MCInst &Inst, unsigned N) const {
361    addMemOperands(Inst, N);
362  }
363  void addMem32Operands(MCInst &Inst, unsigned N) const {
364    addMemOperands(Inst, N);
365  }
366  void addMem64Operands(MCInst &Inst, unsigned N) const {
367    addMemOperands(Inst, N);
368  }
369  void addMem80Operands(MCInst &Inst, unsigned N) const {
370    addMemOperands(Inst, N);
371  }
372  void addMem128Operands(MCInst &Inst, unsigned N) const {
373    addMemOperands(Inst, N);
374  }
375  void addMem256Operands(MCInst &Inst, unsigned N) const {
376    addMemOperands(Inst, N);
377  }
378
379  void addMemOperands(MCInst &Inst, unsigned N) const {
380    assert((N == 5) && "Invalid number of operands!");
381    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
382    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
383    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
384    addExpr(Inst, getMemDisp());
385    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
386  }
387
388  void addAbsMemOperands(MCInst &Inst, unsigned N) const {
389    assert((N == 1) && "Invalid number of operands!");
390    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
391  }
392
393  static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
394    SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1);
395    X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
396    Res->Tok.Data = Str.data();
397    Res->Tok.Length = Str.size();
398    return Res;
399  }
400
401  static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
402    X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
403    Res->Reg.RegNo = RegNo;
404    return Res;
405  }
406
407  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
408    X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
409    Res->Imm.Val = Val;
410    return Res;
411  }
412
413  /// Create an absolute memory operand.
414  static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
415                               SMLoc EndLoc, unsigned Size = 0) {
416    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
417    Res->Mem.SegReg   = 0;
418    Res->Mem.Disp     = Disp;
419    Res->Mem.BaseReg  = 0;
420    Res->Mem.IndexReg = 0;
421    Res->Mem.Scale    = 1;
422    Res->Mem.Size     = Size;
423    return Res;
424  }
425
426  /// Create a generalized memory operand.
427  static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
428                               unsigned BaseReg, unsigned IndexReg,
429                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
430                               unsigned Size = 0) {
431    // We should never just have a displacement, that should be parsed as an
432    // absolute memory operand.
433    assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
434
435    // The scale should always be one of {1,2,4,8}.
436    assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
437           "Invalid scale!");
438    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
439    Res->Mem.SegReg   = SegReg;
440    Res->Mem.Disp     = Disp;
441    Res->Mem.BaseReg  = BaseReg;
442    Res->Mem.IndexReg = IndexReg;
443    Res->Mem.Scale    = Scale;
444    Res->Mem.Size     = Size;
445    return Res;
446  }
447};
448
449} // end anonymous namespace.
450
451bool X86AsmParser::isSrcOp(X86Operand &Op) {
452  unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
453
454  return (Op.isMem() &&
455    (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
456    isa<MCConstantExpr>(Op.Mem.Disp) &&
457    cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
458    Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
459}
460
461bool X86AsmParser::isDstOp(X86Operand &Op) {
462  unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
463
464  return Op.isMem() && Op.Mem.SegReg == X86::ES &&
465    isa<MCConstantExpr>(Op.Mem.Disp) &&
466    cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
467    Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
468}
469
470bool X86AsmParser::ParseRegister(unsigned &RegNo,
471                                 SMLoc &StartLoc, SMLoc &EndLoc) {
472  RegNo = 0;
473  bool IntelSyntax = getParser().getAssemblerDialect();
474  if (!IntelSyntax) {
475    const AsmToken &TokPercent = Parser.getTok();
476    assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
477    StartLoc = TokPercent.getLoc();
478    Parser.Lex(); // Eat percent token.
479  }
480
481  const AsmToken &Tok = Parser.getTok();
482  if (Tok.isNot(AsmToken::Identifier)) {
483    if (IntelSyntax) return true;
484    return Error(StartLoc, "invalid register name",
485                 SMRange(StartLoc, Tok.getEndLoc()));
486  }
487
488  RegNo = MatchRegisterName(Tok.getString());
489
490  // If the match failed, try the register name as lowercase.
491  if (RegNo == 0)
492    RegNo = MatchRegisterName(Tok.getString().lower());
493
494  if (!is64BitMode()) {
495    // FIXME: This should be done using Requires<In32BitMode> and
496    // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
497    // checked.
498    // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
499    // REX prefix.
500    if (RegNo == X86::RIZ ||
501        X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
502        X86II::isX86_64NonExtLowByteReg(RegNo) ||
503        X86II::isX86_64ExtendedReg(RegNo))
504      return Error(StartLoc, "register %"
505                   + Tok.getString() + " is only available in 64-bit mode",
506                   SMRange(StartLoc, Tok.getEndLoc()));
507  }
508
509  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
510  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
511    RegNo = X86::ST0;
512    EndLoc = Tok.getLoc();
513    Parser.Lex(); // Eat 'st'
514
515    // Check to see if we have '(4)' after %st.
516    if (getLexer().isNot(AsmToken::LParen))
517      return false;
518    // Lex the paren.
519    getParser().Lex();
520
521    const AsmToken &IntTok = Parser.getTok();
522    if (IntTok.isNot(AsmToken::Integer))
523      return Error(IntTok.getLoc(), "expected stack index");
524    switch (IntTok.getIntVal()) {
525    case 0: RegNo = X86::ST0; break;
526    case 1: RegNo = X86::ST1; break;
527    case 2: RegNo = X86::ST2; break;
528    case 3: RegNo = X86::ST3; break;
529    case 4: RegNo = X86::ST4; break;
530    case 5: RegNo = X86::ST5; break;
531    case 6: RegNo = X86::ST6; break;
532    case 7: RegNo = X86::ST7; break;
533    default: return Error(IntTok.getLoc(), "invalid stack index");
534    }
535
536    if (getParser().Lex().isNot(AsmToken::RParen))
537      return Error(Parser.getTok().getLoc(), "expected ')'");
538
539    EndLoc = Tok.getLoc();
540    Parser.Lex(); // Eat ')'
541    return false;
542  }
543
544  // If this is "db[0-7]", match it as an alias
545  // for dr[0-7].
546  if (RegNo == 0 && Tok.getString().size() == 3 &&
547      Tok.getString().startswith("db")) {
548    switch (Tok.getString()[2]) {
549    case '0': RegNo = X86::DR0; break;
550    case '1': RegNo = X86::DR1; break;
551    case '2': RegNo = X86::DR2; break;
552    case '3': RegNo = X86::DR3; break;
553    case '4': RegNo = X86::DR4; break;
554    case '5': RegNo = X86::DR5; break;
555    case '6': RegNo = X86::DR6; break;
556    case '7': RegNo = X86::DR7; break;
557    }
558
559    if (RegNo != 0) {
560      EndLoc = Tok.getLoc();
561      Parser.Lex(); // Eat it.
562      return false;
563    }
564  }
565
566  if (RegNo == 0) {
567    if (IntelSyntax) return true;
568    return Error(StartLoc, "invalid register name",
569                 SMRange(StartLoc, Tok.getEndLoc()));
570  }
571
572  EndLoc = Tok.getEndLoc();
573  Parser.Lex(); // Eat identifier token.
574  return false;
575}
576
577X86Operand *X86AsmParser::ParseOperand() {
578  if (getParser().getAssemblerDialect())
579    return ParseIntelOperand();
580  return ParseATTOperand();
581}
582
583/// getIntelMemOperandSize - Return intel memory operand size.
584static unsigned getIntelMemOperandSize(StringRef OpStr) {
585  unsigned Size = 0;
586  if (OpStr == "BYTE") Size = 8;
587  if (OpStr == "WORD") Size = 16;
588  if (OpStr == "DWORD") Size = 32;
589  if (OpStr == "QWORD") Size = 64;
590  if (OpStr == "XWORD") Size = 80;
591  if (OpStr == "XMMWORD") Size = 128;
592  if (OpStr == "YMMWORD") Size = 256;
593  return Size;
594}
595
596X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
597                                                   unsigned Size) {
598  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
599  SMLoc Start = Parser.getTok().getLoc(), End;
600
601  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
602  // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ]
603
604  // Eat '['
605  if (getLexer().isNot(AsmToken::LBrac))
606    return ErrorOperand(Start, "Expected '[' token!");
607  Parser.Lex();
608
609  if (getLexer().is(AsmToken::Identifier)) {
610    // Parse BaseReg
611    if (ParseRegister(BaseReg, Start, End)) {
612      // Handle '[' 'symbol' ']'
613      if (getParser().ParseExpression(Disp, End)) return 0;
614      if (getLexer().isNot(AsmToken::RBrac))
615        return ErrorOperand(Start, "Expected ']' token!");
616      Parser.Lex();
617      return X86Operand::CreateMem(Disp, Start, End, Size);
618    }
619  } else if (getLexer().is(AsmToken::Integer)) {
620      int64_t Val = Parser.getTok().getIntVal();
621      Parser.Lex();
622      SMLoc Loc = Parser.getTok().getLoc();
623      if (getLexer().is(AsmToken::RBrac)) {
624        // Handle '[' number ']'
625        Parser.Lex();
626        const MCExpr *Disp = MCConstantExpr::Create(Val, getContext());
627        if (SegReg)
628          return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale,
629                                       Start, End, Size);
630        return X86Operand::CreateMem(Disp, Start, End, Size);
631      } else if (getLexer().is(AsmToken::Star)) {
632        // Handle '[' Scale*IndexReg ']'
633        Parser.Lex();
634        SMLoc IdxRegLoc = Parser.getTok().getLoc();
635	if (ParseRegister(IndexReg, IdxRegLoc, End))
636	  return ErrorOperand(IdxRegLoc, "Expected register");
637        Scale = Val;
638      } else
639        return ErrorOperand(Loc, "Unepxeted token");
640  }
641
642  if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) {
643    bool isPlus = getLexer().is(AsmToken::Plus);
644    Parser.Lex();
645    SMLoc PlusLoc = Parser.getTok().getLoc();
646    if (getLexer().is(AsmToken::Integer)) {
647      int64_t Val = Parser.getTok().getIntVal();
648      Parser.Lex();
649      if (getLexer().is(AsmToken::Star)) {
650        Parser.Lex();
651        SMLoc IdxRegLoc = Parser.getTok().getLoc();
652	if (ParseRegister(IndexReg, IdxRegLoc, End))
653	  return ErrorOperand(IdxRegLoc, "Expected register");
654        Scale = Val;
655      } else if (getLexer().is(AsmToken::RBrac)) {
656        const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext());
657        Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext());
658      } else
659        return ErrorOperand(PlusLoc, "unexpected token after +");
660    } else if (getLexer().is(AsmToken::Identifier)) {
661      // This could be an index register or a displacement expression.
662      End = Parser.getTok().getLoc();
663      if (!IndexReg)
664        ParseRegister(IndexReg, Start, End);
665      else if (getParser().ParseExpression(Disp, End)) return 0;
666    }
667  }
668
669  if (getLexer().isNot(AsmToken::RBrac))
670    if (getParser().ParseExpression(Disp, End)) return 0;
671
672  End = Parser.getTok().getLoc();
673  if (getLexer().isNot(AsmToken::RBrac))
674    return ErrorOperand(End, "expected ']' token!");
675  Parser.Lex();
676  End = Parser.getTok().getLoc();
677
678  // handle [-42]
679  if (!BaseReg && !IndexReg)
680    return X86Operand::CreateMem(Disp, Start, End, Size);
681
682  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
683                               Start, End, Size);
684}
685
686/// ParseIntelMemOperand - Parse intel style memory operand.
687X86Operand *X86AsmParser::ParseIntelMemOperand() {
688  const AsmToken &Tok = Parser.getTok();
689  SMLoc Start = Parser.getTok().getLoc(), End;
690  unsigned SegReg = 0;
691
692  unsigned Size = getIntelMemOperandSize(Tok.getString());
693  if (Size) {
694    Parser.Lex();
695    assert (Tok.getString() == "PTR" && "Unexpected token!");
696    Parser.Lex();
697  }
698
699  if (getLexer().is(AsmToken::LBrac))
700    return ParseIntelBracExpression(SegReg, Size);
701
702  if (!ParseRegister(SegReg, Start, End)) {
703    // Handel SegReg : [ ... ]
704    if (getLexer().isNot(AsmToken::Colon))
705      return ErrorOperand(Start, "Expected ':' token!");
706    Parser.Lex(); // Eat :
707    if (getLexer().isNot(AsmToken::LBrac))
708      return ErrorOperand(Start, "Expected '[' token!");
709    return ParseIntelBracExpression(SegReg, Size);
710  }
711
712  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
713  if (getParser().ParseExpression(Disp, End)) return 0;
714  return X86Operand::CreateMem(Disp, Start, End, Size);
715}
716
717X86Operand *X86AsmParser::ParseIntelOperand() {
718  SMLoc Start = Parser.getTok().getLoc(), End;
719
720  // immediate.
721  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
722      getLexer().is(AsmToken::Minus)) {
723    const MCExpr *Val;
724    if (!getParser().ParseExpression(Val, End)) {
725      End = Parser.getTok().getLoc();
726      return X86Operand::CreateImm(Val, Start, End);
727    }
728  }
729
730  // register
731  unsigned RegNo = 0;
732  if (!ParseRegister(RegNo, Start, End)) {
733    End = Parser.getTok().getLoc();
734    return X86Operand::CreateReg(RegNo, Start, End);
735  }
736
737  // mem operand
738  return ParseIntelMemOperand();
739}
740
741X86Operand *X86AsmParser::ParseATTOperand() {
742  switch (getLexer().getKind()) {
743  default:
744    // Parse a memory operand with no segment register.
745    return ParseMemOperand(0, Parser.getTok().getLoc());
746  case AsmToken::Percent: {
747    // Read the register.
748    unsigned RegNo;
749    SMLoc Start, End;
750    if (ParseRegister(RegNo, Start, End)) return 0;
751    if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
752      Error(Start, "%eiz and %riz can only be used as index registers",
753            SMRange(Start, End));
754      return 0;
755    }
756
757    // If this is a segment register followed by a ':', then this is the start
758    // of a memory reference, otherwise this is a normal register reference.
759    if (getLexer().isNot(AsmToken::Colon))
760      return X86Operand::CreateReg(RegNo, Start, End);
761
762
763    getParser().Lex(); // Eat the colon.
764    return ParseMemOperand(RegNo, Start);
765  }
766  case AsmToken::Dollar: {
767    // $42 -> immediate.
768    SMLoc Start = Parser.getTok().getLoc(), End;
769    Parser.Lex();
770    const MCExpr *Val;
771    if (getParser().ParseExpression(Val, End))
772      return 0;
773    return X86Operand::CreateImm(Val, Start, End);
774  }
775  }
776}
777
778/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
779/// has already been parsed if present.
780X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
781
782  // We have to disambiguate a parenthesized expression "(4+5)" from the start
783  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
784  // only way to do this without lookahead is to eat the '(' and see what is
785  // after it.
786  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
787  if (getLexer().isNot(AsmToken::LParen)) {
788    SMLoc ExprEnd;
789    if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
790
791    // After parsing the base expression we could either have a parenthesized
792    // memory address or not.  If not, return now.  If so, eat the (.
793    if (getLexer().isNot(AsmToken::LParen)) {
794      // Unless we have a segment register, treat this as an immediate.
795      if (SegReg == 0)
796        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
797      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
798    }
799
800    // Eat the '('.
801    Parser.Lex();
802  } else {
803    // Okay, we have a '('.  We don't know if this is an expression or not, but
804    // so we have to eat the ( to see beyond it.
805    SMLoc LParenLoc = Parser.getTok().getLoc();
806    Parser.Lex(); // Eat the '('.
807
808    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
809      // Nothing to do here, fall into the code below with the '(' part of the
810      // memory operand consumed.
811    } else {
812      SMLoc ExprEnd;
813
814      // It must be an parenthesized expression, parse it now.
815      if (getParser().ParseParenExpression(Disp, ExprEnd))
816        return 0;
817
818      // After parsing the base expression we could either have a parenthesized
819      // memory address or not.  If not, return now.  If so, eat the (.
820      if (getLexer().isNot(AsmToken::LParen)) {
821        // Unless we have a segment register, treat this as an immediate.
822        if (SegReg == 0)
823          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
824        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
825      }
826
827      // Eat the '('.
828      Parser.Lex();
829    }
830  }
831
832  // If we reached here, then we just ate the ( of the memory operand.  Process
833  // the rest of the memory operand.
834  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
835
836  if (getLexer().is(AsmToken::Percent)) {
837    SMLoc StartLoc, EndLoc;
838    if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
839    if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
840      Error(StartLoc, "eiz and riz can only be used as index registers",
841            SMRange(StartLoc, EndLoc));
842      return 0;
843    }
844  }
845
846  if (getLexer().is(AsmToken::Comma)) {
847    Parser.Lex(); // Eat the comma.
848
849    // Following the comma we should have either an index register, or a scale
850    // value. We don't support the later form, but we want to parse it
851    // correctly.
852    //
853    // Not that even though it would be completely consistent to support syntax
854    // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
855    if (getLexer().is(AsmToken::Percent)) {
856      SMLoc L;
857      if (ParseRegister(IndexReg, L, L)) return 0;
858
859      if (getLexer().isNot(AsmToken::RParen)) {
860        // Parse the scale amount:
861        //  ::= ',' [scale-expression]
862        if (getLexer().isNot(AsmToken::Comma)) {
863          Error(Parser.getTok().getLoc(),
864                "expected comma in scale expression");
865          return 0;
866        }
867        Parser.Lex(); // Eat the comma.
868
869        if (getLexer().isNot(AsmToken::RParen)) {
870          SMLoc Loc = Parser.getTok().getLoc();
871
872          int64_t ScaleVal;
873          if (getParser().ParseAbsoluteExpression(ScaleVal))
874            return 0;
875
876          // Validate the scale amount.
877          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
878            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
879            return 0;
880          }
881          Scale = (unsigned)ScaleVal;
882        }
883      }
884    } else if (getLexer().isNot(AsmToken::RParen)) {
885      // A scale amount without an index is ignored.
886      // index.
887      SMLoc Loc = Parser.getTok().getLoc();
888
889      int64_t Value;
890      if (getParser().ParseAbsoluteExpression(Value))
891        return 0;
892
893      if (Value != 1)
894        Warning(Loc, "scale factor without index register is ignored");
895      Scale = 1;
896    }
897  }
898
899  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
900  if (getLexer().isNot(AsmToken::RParen)) {
901    Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
902    return 0;
903  }
904  SMLoc MemEnd = Parser.getTok().getLoc();
905  Parser.Lex(); // Eat the ')'.
906
907  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
908                               MemStart, MemEnd);
909}
910
911bool X86AsmParser::
912ParseInstruction(StringRef Name, SMLoc NameLoc,
913                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
914  StringRef PatchedName = Name;
915
916  // FIXME: Hack to recognize setneb as setne.
917  if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
918      PatchedName != "setb" && PatchedName != "setnb")
919    PatchedName = PatchedName.substr(0, Name.size()-1);
920
921  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
922  const MCExpr *ExtraImmOp = 0;
923  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
924      (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
925       PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
926    bool IsVCMP = PatchedName.startswith("vcmp");
927    unsigned SSECCIdx = IsVCMP ? 4 : 3;
928    unsigned SSEComparisonCode = StringSwitch<unsigned>(
929      PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
930      .Case("eq",          0)
931      .Case("lt",          1)
932      .Case("le",          2)
933      .Case("unord",       3)
934      .Case("neq",         4)
935      .Case("nlt",         5)
936      .Case("nle",         6)
937      .Case("ord",         7)
938      .Case("eq_uq",       8)
939      .Case("nge",         9)
940      .Case("ngt",      0x0A)
941      .Case("false",    0x0B)
942      .Case("neq_oq",   0x0C)
943      .Case("ge",       0x0D)
944      .Case("gt",       0x0E)
945      .Case("true",     0x0F)
946      .Case("eq_os",    0x10)
947      .Case("lt_oq",    0x11)
948      .Case("le_oq",    0x12)
949      .Case("unord_s",  0x13)
950      .Case("neq_us",   0x14)
951      .Case("nlt_uq",   0x15)
952      .Case("nle_uq",   0x16)
953      .Case("ord_s",    0x17)
954      .Case("eq_us",    0x18)
955      .Case("nge_uq",   0x19)
956      .Case("ngt_uq",   0x1A)
957      .Case("false_os", 0x1B)
958      .Case("neq_os",   0x1C)
959      .Case("ge_oq",    0x1D)
960      .Case("gt_oq",    0x1E)
961      .Case("true_us",  0x1F)
962      .Default(~0U);
963    if (SSEComparisonCode != ~0U) {
964      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
965                                          getParser().getContext());
966      if (PatchedName.endswith("ss")) {
967        PatchedName = IsVCMP ? "vcmpss" : "cmpss";
968      } else if (PatchedName.endswith("sd")) {
969        PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
970      } else if (PatchedName.endswith("ps")) {
971        PatchedName = IsVCMP ? "vcmpps" : "cmpps";
972      } else {
973        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
974        PatchedName = IsVCMP ? "vcmppd" : "cmppd";
975      }
976    }
977  }
978
979  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
980
981  if (ExtraImmOp)
982    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
983
984
985  // Determine whether this is an instruction prefix.
986  bool isPrefix =
987    Name == "lock" || Name == "rep" ||
988    Name == "repe" || Name == "repz" ||
989    Name == "repne" || Name == "repnz" ||
990    Name == "rex64" || Name == "data16";
991
992
993  // This does the actual operand parsing.  Don't parse any more if we have a
994  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
995  // just want to parse the "lock" as the first instruction and the "incl" as
996  // the next one.
997  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
998
999    // Parse '*' modifier.
1000    if (getLexer().is(AsmToken::Star)) {
1001      SMLoc Loc = Parser.getTok().getLoc();
1002      Operands.push_back(X86Operand::CreateToken("*", Loc));
1003      Parser.Lex(); // Eat the star.
1004    }
1005
1006    // Read the first operand.
1007    if (X86Operand *Op = ParseOperand())
1008      Operands.push_back(Op);
1009    else {
1010      Parser.EatToEndOfStatement();
1011      return true;
1012    }
1013
1014    while (getLexer().is(AsmToken::Comma)) {
1015      Parser.Lex();  // Eat the comma.
1016
1017      // Parse and remember the operand.
1018      if (X86Operand *Op = ParseOperand())
1019        Operands.push_back(Op);
1020      else {
1021        Parser.EatToEndOfStatement();
1022        return true;
1023      }
1024    }
1025
1026    if (getLexer().isNot(AsmToken::EndOfStatement)) {
1027      SMLoc Loc = getLexer().getLoc();
1028      Parser.EatToEndOfStatement();
1029      return Error(Loc, "unexpected token in argument list");
1030    }
1031  }
1032
1033  if (getLexer().is(AsmToken::EndOfStatement))
1034    Parser.Lex(); // Consume the EndOfStatement
1035  else if (isPrefix && getLexer().is(AsmToken::Slash))
1036    Parser.Lex(); // Consume the prefix separator Slash
1037
1038  // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1039  // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
1040  // documented form in various unofficial manuals, so a lot of code uses it.
1041  if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1042      Operands.size() == 3) {
1043    X86Operand &Op = *(X86Operand*)Operands.back();
1044    if (Op.isMem() && Op.Mem.SegReg == 0 &&
1045        isa<MCConstantExpr>(Op.Mem.Disp) &&
1046        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1047        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1048      SMLoc Loc = Op.getEndLoc();
1049      Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1050      delete &Op;
1051    }
1052  }
1053  // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1054  if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1055      Operands.size() == 3) {
1056    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1057    if (Op.isMem() && Op.Mem.SegReg == 0 &&
1058        isa<MCConstantExpr>(Op.Mem.Disp) &&
1059        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1060        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1061      SMLoc Loc = Op.getEndLoc();
1062      Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1063      delete &Op;
1064    }
1065  }
1066  // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
1067  if (Name.startswith("ins") && Operands.size() == 3 &&
1068      (Name == "insb" || Name == "insw" || Name == "insl")) {
1069    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1070    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1071    if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
1072      Operands.pop_back();
1073      Operands.pop_back();
1074      delete &Op;
1075      delete &Op2;
1076    }
1077  }
1078
1079  // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
1080  if (Name.startswith("outs") && Operands.size() == 3 &&
1081      (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
1082    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1083    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1084    if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
1085      Operands.pop_back();
1086      Operands.pop_back();
1087      delete &Op;
1088      delete &Op2;
1089    }
1090  }
1091
1092  // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
1093  if (Name.startswith("movs") && Operands.size() == 3 &&
1094      (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
1095       (is64BitMode() && Name == "movsq"))) {
1096    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1097    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1098    if (isSrcOp(Op) && isDstOp(Op2)) {
1099      Operands.pop_back();
1100      Operands.pop_back();
1101      delete &Op;
1102      delete &Op2;
1103    }
1104  }
1105  // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
1106  if (Name.startswith("lods") && Operands.size() == 3 &&
1107      (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
1108       Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
1109    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1110    X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1111    if (isSrcOp(*Op1) && Op2->isReg()) {
1112      const char *ins;
1113      unsigned reg = Op2->getReg();
1114      bool isLods = Name == "lods";
1115      if (reg == X86::AL && (isLods || Name == "lodsb"))
1116        ins = "lodsb";
1117      else if (reg == X86::AX && (isLods || Name == "lodsw"))
1118        ins = "lodsw";
1119      else if (reg == X86::EAX && (isLods || Name == "lodsl"))
1120        ins = "lodsl";
1121      else if (reg == X86::RAX && (isLods || Name == "lodsq"))
1122        ins = "lodsq";
1123      else
1124        ins = NULL;
1125      if (ins != NULL) {
1126        Operands.pop_back();
1127        Operands.pop_back();
1128        delete Op1;
1129        delete Op2;
1130        if (Name != ins)
1131          static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
1132      }
1133    }
1134  }
1135  // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
1136  if (Name.startswith("stos") && Operands.size() == 3 &&
1137      (Name == "stos" || Name == "stosb" || Name == "stosw" ||
1138       Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
1139    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1140    X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1141    if (isDstOp(*Op2) && Op1->isReg()) {
1142      const char *ins;
1143      unsigned reg = Op1->getReg();
1144      bool isStos = Name == "stos";
1145      if (reg == X86::AL && (isStos || Name == "stosb"))
1146        ins = "stosb";
1147      else if (reg == X86::AX && (isStos || Name == "stosw"))
1148        ins = "stosw";
1149      else if (reg == X86::EAX && (isStos || Name == "stosl"))
1150        ins = "stosl";
1151      else if (reg == X86::RAX && (isStos || Name == "stosq"))
1152        ins = "stosq";
1153      else
1154        ins = NULL;
1155      if (ins != NULL) {
1156        Operands.pop_back();
1157        Operands.pop_back();
1158        delete Op1;
1159        delete Op2;
1160        if (Name != ins)
1161          static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
1162      }
1163    }
1164  }
1165
1166  // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
1167  // "shift <op>".
1168  if ((Name.startswith("shr") || Name.startswith("sar") ||
1169       Name.startswith("shl") || Name.startswith("sal") ||
1170       Name.startswith("rcl") || Name.startswith("rcr") ||
1171       Name.startswith("rol") || Name.startswith("ror")) &&
1172      Operands.size() == 3) {
1173    if (getParser().getAssemblerDialect()) {
1174      // Intel syntax
1175      X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
1176      if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1177	  cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
1178	delete Operands[2];
1179	Operands.pop_back();
1180      }
1181    } else {
1182      X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1183      if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1184	  cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
1185	delete Operands[1];
1186	Operands.erase(Operands.begin() + 1);
1187      }
1188    }
1189  }
1190
1191  // Transforms "int $3" into "int3" as a size optimization.  We can't write an
1192  // instalias with an immediate operand yet.
1193  if (Name == "int" && Operands.size() == 2) {
1194    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1195    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1196        cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
1197      delete Operands[1];
1198      Operands.erase(Operands.begin() + 1);
1199      static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
1200    }
1201  }
1202
1203  return false;
1204}
1205
1206bool X86AsmParser::
1207processInstruction(MCInst &Inst,
1208                   const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
1209  switch (Inst.getOpcode()) {
1210  default: return false;
1211  case X86::AND16i16: {
1212    if (!Inst.getOperand(0).isImm() ||
1213        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1214      return false;
1215
1216    MCInst TmpInst;
1217    TmpInst.setOpcode(X86::AND16ri8);
1218    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1219    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1220    TmpInst.addOperand(Inst.getOperand(0));
1221    Inst = TmpInst;
1222    return true;
1223  }
1224  case X86::AND32i32: {
1225    if (!Inst.getOperand(0).isImm() ||
1226        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1227      return false;
1228
1229    MCInst TmpInst;
1230    TmpInst.setOpcode(X86::AND32ri8);
1231    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1232    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1233    TmpInst.addOperand(Inst.getOperand(0));
1234    Inst = TmpInst;
1235    return true;
1236  }
1237  case X86::AND64i32: {
1238    if (!Inst.getOperand(0).isImm() ||
1239        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1240      return false;
1241
1242    MCInst TmpInst;
1243    TmpInst.setOpcode(X86::AND64ri8);
1244    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1245    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1246    TmpInst.addOperand(Inst.getOperand(0));
1247    Inst = TmpInst;
1248    return true;
1249  }
1250  case X86::XOR16i16: {
1251    if (!Inst.getOperand(0).isImm() ||
1252        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1253      return false;
1254
1255    MCInst TmpInst;
1256    TmpInst.setOpcode(X86::XOR16ri8);
1257    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1258    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1259    TmpInst.addOperand(Inst.getOperand(0));
1260    Inst = TmpInst;
1261    return true;
1262  }
1263  case X86::XOR32i32: {
1264    if (!Inst.getOperand(0).isImm() ||
1265        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1266      return false;
1267
1268    MCInst TmpInst;
1269    TmpInst.setOpcode(X86::XOR32ri8);
1270    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1271    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1272    TmpInst.addOperand(Inst.getOperand(0));
1273    Inst = TmpInst;
1274    return true;
1275  }
1276  case X86::XOR64i32: {
1277    if (!Inst.getOperand(0).isImm() ||
1278        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1279      return false;
1280
1281    MCInst TmpInst;
1282    TmpInst.setOpcode(X86::XOR64ri8);
1283    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1284    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1285    TmpInst.addOperand(Inst.getOperand(0));
1286    Inst = TmpInst;
1287    return true;
1288  }
1289  case X86::OR16i16: {
1290    if (!Inst.getOperand(0).isImm() ||
1291        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1292      return false;
1293
1294    MCInst TmpInst;
1295    TmpInst.setOpcode(X86::OR16ri8);
1296    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1297    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1298    TmpInst.addOperand(Inst.getOperand(0));
1299    Inst = TmpInst;
1300    return true;
1301  }
1302  case X86::OR32i32: {
1303    if (!Inst.getOperand(0).isImm() ||
1304        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1305      return false;
1306
1307    MCInst TmpInst;
1308    TmpInst.setOpcode(X86::OR32ri8);
1309    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1310    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1311    TmpInst.addOperand(Inst.getOperand(0));
1312    Inst = TmpInst;
1313    return true;
1314  }
1315  case X86::OR64i32: {
1316    if (!Inst.getOperand(0).isImm() ||
1317        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1318      return false;
1319
1320    MCInst TmpInst;
1321    TmpInst.setOpcode(X86::OR64ri8);
1322    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1323    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1324    TmpInst.addOperand(Inst.getOperand(0));
1325    Inst = TmpInst;
1326    return true;
1327  }
1328  case X86::CMP16i16: {
1329    if (!Inst.getOperand(0).isImm() ||
1330        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1331      return false;
1332
1333    MCInst TmpInst;
1334    TmpInst.setOpcode(X86::CMP16ri8);
1335    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1336    TmpInst.addOperand(Inst.getOperand(0));
1337    Inst = TmpInst;
1338    return true;
1339  }
1340  case X86::CMP32i32: {
1341    if (!Inst.getOperand(0).isImm() ||
1342        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1343      return false;
1344
1345    MCInst TmpInst;
1346    TmpInst.setOpcode(X86::CMP32ri8);
1347    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1348    TmpInst.addOperand(Inst.getOperand(0));
1349    Inst = TmpInst;
1350    return true;
1351  }
1352  case X86::CMP64i32: {
1353    if (!Inst.getOperand(0).isImm() ||
1354        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1355      return false;
1356
1357    MCInst TmpInst;
1358    TmpInst.setOpcode(X86::CMP64ri8);
1359    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1360    TmpInst.addOperand(Inst.getOperand(0));
1361    Inst = TmpInst;
1362    return true;
1363  }
1364  case X86::ADD16i16: {
1365    if (!Inst.getOperand(0).isImm() ||
1366        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1367      return false;
1368
1369    MCInst TmpInst;
1370    TmpInst.setOpcode(X86::ADD16ri8);
1371    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1372    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1373    TmpInst.addOperand(Inst.getOperand(0));
1374    Inst = TmpInst;
1375    return true;
1376  }
1377  case X86::ADD32i32: {
1378    if (!Inst.getOperand(0).isImm() ||
1379        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1380      return false;
1381
1382    MCInst TmpInst;
1383    TmpInst.setOpcode(X86::ADD32ri8);
1384    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1385    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1386    TmpInst.addOperand(Inst.getOperand(0));
1387    Inst = TmpInst;
1388    return true;
1389  }
1390  case X86::ADD64i32: {
1391    if (!Inst.getOperand(0).isImm() ||
1392        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1393      return false;
1394
1395    MCInst TmpInst;
1396    TmpInst.setOpcode(X86::ADD64ri8);
1397    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1398    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1399    TmpInst.addOperand(Inst.getOperand(0));
1400    Inst = TmpInst;
1401    return true;
1402  }
1403  case X86::SUB16i16: {
1404    if (!Inst.getOperand(0).isImm() ||
1405        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1406      return false;
1407
1408    MCInst TmpInst;
1409    TmpInst.setOpcode(X86::SUB16ri8);
1410    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1411    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1412    TmpInst.addOperand(Inst.getOperand(0));
1413    Inst = TmpInst;
1414    return true;
1415  }
1416  case X86::SUB32i32: {
1417    if (!Inst.getOperand(0).isImm() ||
1418        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1419      return false;
1420
1421    MCInst TmpInst;
1422    TmpInst.setOpcode(X86::SUB32ri8);
1423    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1424    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1425    TmpInst.addOperand(Inst.getOperand(0));
1426    Inst = TmpInst;
1427    return true;
1428  }
1429  case X86::SUB64i32: {
1430    if (!Inst.getOperand(0).isImm() ||
1431        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1432      return false;
1433
1434    MCInst TmpInst;
1435    TmpInst.setOpcode(X86::SUB64ri8);
1436    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1437    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1438    TmpInst.addOperand(Inst.getOperand(0));
1439    Inst = TmpInst;
1440    return true;
1441  }
1442  }
1443  return false;
1444}
1445
1446bool X86AsmParser::
1447MatchAndEmitInstruction(SMLoc IDLoc,
1448                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
1449                        MCStreamer &Out) {
1450  assert(!Operands.empty() && "Unexpect empty operand list!");
1451  X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
1452  assert(Op->isToken() && "Leading operand should always be a mnemonic!");
1453
1454  // First, handle aliases that expand to multiple instructions.
1455  // FIXME: This should be replaced with a real .td file alias mechanism.
1456  // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
1457  // call.
1458  if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
1459      Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
1460      Op->getToken() == "finit" || Op->getToken() == "fsave" ||
1461      Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
1462    MCInst Inst;
1463    Inst.setOpcode(X86::WAIT);
1464    Inst.setLoc(IDLoc);
1465    Out.EmitInstruction(Inst);
1466
1467    const char *Repl =
1468      StringSwitch<const char*>(Op->getToken())
1469        .Case("finit",  "fninit")
1470        .Case("fsave",  "fnsave")
1471        .Case("fstcw",  "fnstcw")
1472        .Case("fstcww",  "fnstcw")
1473        .Case("fstenv", "fnstenv")
1474        .Case("fstsw",  "fnstsw")
1475        .Case("fstsww", "fnstsw")
1476        .Case("fclex",  "fnclex")
1477        .Default(0);
1478    assert(Repl && "Unknown wait-prefixed instruction");
1479    delete Operands[0];
1480    Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
1481  }
1482
1483  bool WasOriginallyInvalidOperand = false;
1484  unsigned OrigErrorInfo;
1485  MCInst Inst;
1486
1487  // First, try a direct match.
1488  switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo,
1489                               getParser().getAssemblerDialect())) {
1490  default: break;
1491  case Match_Success:
1492    // Some instructions need post-processing to, for example, tweak which
1493    // encoding is selected. Loop on it while changes happen so the
1494    // individual transformations can chain off each other.
1495    while (processInstruction(Inst, Operands))
1496      ;
1497
1498    Inst.setLoc(IDLoc);
1499    Out.EmitInstruction(Inst);
1500    return false;
1501  case Match_MissingFeature:
1502    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
1503    return true;
1504  case Match_ConversionFail:
1505    return Error(IDLoc, "unable to convert operands to instruction");
1506  case Match_InvalidOperand:
1507    WasOriginallyInvalidOperand = true;
1508    break;
1509  case Match_MnemonicFail:
1510    break;
1511  }
1512
1513  // FIXME: Ideally, we would only attempt suffix matches for things which are
1514  // valid prefixes, and we could just infer the right unambiguous
1515  // type. However, that requires substantially more matcher support than the
1516  // following hack.
1517
1518  // Change the operand to point to a temporary token.
1519  StringRef Base = Op->getToken();
1520  SmallString<16> Tmp;
1521  Tmp += Base;
1522  Tmp += ' ';
1523  Op->setTokenValue(Tmp.str());
1524
1525  // If this instruction starts with an 'f', then it is a floating point stack
1526  // instruction.  These come in up to three forms for 32-bit, 64-bit, and
1527  // 80-bit floating point, which use the suffixes s,l,t respectively.
1528  //
1529  // Otherwise, we assume that this may be an integer instruction, which comes
1530  // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
1531  const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
1532
1533  // Check for the various suffix matches.
1534  Tmp[Base.size()] = Suffixes[0];
1535  unsigned ErrorInfoIgnore;
1536  unsigned Match1, Match2, Match3, Match4;
1537
1538  Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
1539  Tmp[Base.size()] = Suffixes[1];
1540  Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
1541  Tmp[Base.size()] = Suffixes[2];
1542  Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
1543  Tmp[Base.size()] = Suffixes[3];
1544  Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
1545
1546  // Restore the old token.
1547  Op->setTokenValue(Base);
1548
1549  // If exactly one matched, then we treat that as a successful match (and the
1550  // instruction will already have been filled in correctly, since the failing
1551  // matches won't have modified it).
1552  unsigned NumSuccessfulMatches =
1553    (Match1 == Match_Success) + (Match2 == Match_Success) +
1554    (Match3 == Match_Success) + (Match4 == Match_Success);
1555  if (NumSuccessfulMatches == 1) {
1556    Inst.setLoc(IDLoc);
1557    Out.EmitInstruction(Inst);
1558    return false;
1559  }
1560
1561  // Otherwise, the match failed, try to produce a decent error message.
1562
1563  // If we had multiple suffix matches, then identify this as an ambiguous
1564  // match.
1565  if (NumSuccessfulMatches > 1) {
1566    char MatchChars[4];
1567    unsigned NumMatches = 0;
1568    if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
1569    if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
1570    if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
1571    if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
1572
1573    SmallString<126> Msg;
1574    raw_svector_ostream OS(Msg);
1575    OS << "ambiguous instructions require an explicit suffix (could be ";
1576    for (unsigned i = 0; i != NumMatches; ++i) {
1577      if (i != 0)
1578        OS << ", ";
1579      if (i + 1 == NumMatches)
1580        OS << "or ";
1581      OS << "'" << Base << MatchChars[i] << "'";
1582    }
1583    OS << ")";
1584    Error(IDLoc, OS.str());
1585    return true;
1586  }
1587
1588  // Okay, we know that none of the variants matched successfully.
1589
1590  // If all of the instructions reported an invalid mnemonic, then the original
1591  // mnemonic was invalid.
1592  if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
1593      (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
1594    if (!WasOriginallyInvalidOperand) {
1595      return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
1596                   Op->getLocRange());
1597    }
1598
1599    // Recover location info for the operand if we know which was the problem.
1600    if (OrigErrorInfo != ~0U) {
1601      if (OrigErrorInfo >= Operands.size())
1602        return Error(IDLoc, "too few operands for instruction");
1603
1604      X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
1605      if (Operand->getStartLoc().isValid()) {
1606        SMRange OperandRange = Operand->getLocRange();
1607        return Error(Operand->getStartLoc(), "invalid operand for instruction",
1608                     OperandRange);
1609      }
1610    }
1611
1612    return Error(IDLoc, "invalid operand for instruction");
1613  }
1614
1615  // If one instruction matched with a missing feature, report this as a
1616  // missing feature.
1617  if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
1618      (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
1619    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
1620    return true;
1621  }
1622
1623  // If one instruction matched with an invalid operand, report this as an
1624  // operand failure.
1625  if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
1626      (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
1627    Error(IDLoc, "invalid operand for instruction");
1628    return true;
1629  }
1630
1631  // If all of these were an outright failure, report it in a useless way.
1632  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
1633  return true;
1634}
1635
1636
1637bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
1638  StringRef IDVal = DirectiveID.getIdentifier();
1639  if (IDVal == ".word")
1640    return ParseDirectiveWord(2, DirectiveID.getLoc());
1641  else if (IDVal.startswith(".code"))
1642    return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
1643  return true;
1644}
1645
1646/// ParseDirectiveWord
1647///  ::= .word [ expression (, expression)* ]
1648bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
1649  if (getLexer().isNot(AsmToken::EndOfStatement)) {
1650    for (;;) {
1651      const MCExpr *Value;
1652      if (getParser().ParseExpression(Value))
1653        return true;
1654
1655      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
1656
1657      if (getLexer().is(AsmToken::EndOfStatement))
1658        break;
1659
1660      // FIXME: Improve diagnostic.
1661      if (getLexer().isNot(AsmToken::Comma))
1662        return Error(L, "unexpected token in directive");
1663      Parser.Lex();
1664    }
1665  }
1666
1667  Parser.Lex();
1668  return false;
1669}
1670
1671/// ParseDirectiveCode
1672///  ::= .code32 | .code64
1673bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
1674  if (IDVal == ".code32") {
1675    Parser.Lex();
1676    if (is64BitMode()) {
1677      SwitchMode();
1678      getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
1679    }
1680  } else if (IDVal == ".code64") {
1681    Parser.Lex();
1682    if (!is64BitMode()) {
1683      SwitchMode();
1684      getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
1685    }
1686  } else {
1687    return Error(L, "unexpected directive " + IDVal);
1688  }
1689
1690  return false;
1691}
1692
1693
1694extern "C" void LLVMInitializeX86AsmLexer();
1695
1696// Force static initialization.
1697extern "C" void LLVMInitializeX86AsmParser() {
1698  RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
1699  RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
1700  LLVMInitializeX86AsmLexer();
1701}
1702
1703#define GET_REGISTER_MATCHER
1704#define GET_MATCHER_IMPLEMENTATION
1705#include "X86GenAsmMatcher.inc"
1706