1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "MCTargetDesc/X86BaseInfo.h"
11#include "X86AsmInstrumentation.h"
12#include "X86AsmParserCommon.h"
13#include "X86Operand.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SmallString.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/StringSwitch.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/MC/MCContext.h"
21#include "llvm/MC/MCExpr.h"
22#include "llvm/MC/MCInst.h"
23#include "llvm/MC/MCInstrInfo.h"
24#include "llvm/MC/MCParser/MCAsmLexer.h"
25#include "llvm/MC/MCParser/MCAsmParser.h"
26#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27#include "llvm/MC/MCRegisterInfo.h"
28#include "llvm/MC/MCStreamer.h"
29#include "llvm/MC/MCSubtargetInfo.h"
30#include "llvm/MC/MCSymbol.h"
31#include "llvm/MC/MCTargetAsmParser.h"
32#include "llvm/Support/SourceMgr.h"
33#include "llvm/Support/TargetRegistry.h"
34#include "llvm/Support/raw_ostream.h"
35#include <memory>
36
37using namespace llvm;
38
39namespace {
40
41static const char OpPrecedence[] = {
42  0, // IC_OR
43  1, // IC_AND
44  2, // IC_LSHIFT
45  2, // IC_RSHIFT
46  3, // IC_PLUS
47  3, // IC_MINUS
48  4, // IC_MULTIPLY
49  4, // IC_DIVIDE
50  5, // IC_RPAREN
51  6, // IC_LPAREN
52  0, // IC_IMM
53  0  // IC_REGISTER
54};
55
56class X86AsmParser : public MCTargetAsmParser {
57  MCSubtargetInfo &STI;
58  MCAsmParser &Parser;
59  const MCInstrInfo &MII;
60  ParseInstructionInfo *InstInfo;
61  std::unique_ptr<X86AsmInstrumentation> Instrumentation;
62private:
63  SMLoc consumeToken() {
64    SMLoc Result = Parser.getTok().getLoc();
65    Parser.Lex();
66    return Result;
67  }
68
69  enum InfixCalculatorTok {
70    IC_OR = 0,
71    IC_AND,
72    IC_LSHIFT,
73    IC_RSHIFT,
74    IC_PLUS,
75    IC_MINUS,
76    IC_MULTIPLY,
77    IC_DIVIDE,
78    IC_RPAREN,
79    IC_LPAREN,
80    IC_IMM,
81    IC_REGISTER
82  };
83
84  class InfixCalculator {
85    typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
86    SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
87    SmallVector<ICToken, 4> PostfixStack;
88
89  public:
90    int64_t popOperand() {
91      assert (!PostfixStack.empty() && "Poped an empty stack!");
92      ICToken Op = PostfixStack.pop_back_val();
93      assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
94              && "Expected and immediate or register!");
95      return Op.second;
96    }
97    void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
98      assert ((Op == IC_IMM || Op == IC_REGISTER) &&
99              "Unexpected operand!");
100      PostfixStack.push_back(std::make_pair(Op, Val));
101    }
102
103    void popOperator() { InfixOperatorStack.pop_back(); }
104    void pushOperator(InfixCalculatorTok Op) {
105      // Push the new operator if the stack is empty.
106      if (InfixOperatorStack.empty()) {
107        InfixOperatorStack.push_back(Op);
108        return;
109      }
110
111      // Push the new operator if it has a higher precedence than the operator
112      // on the top of the stack or the operator on the top of the stack is a
113      // left parentheses.
114      unsigned Idx = InfixOperatorStack.size() - 1;
115      InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
116      if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
117        InfixOperatorStack.push_back(Op);
118        return;
119      }
120
121      // The operator on the top of the stack has higher precedence than the
122      // new operator.
123      unsigned ParenCount = 0;
124      while (1) {
125        // Nothing to process.
126        if (InfixOperatorStack.empty())
127          break;
128
129        Idx = InfixOperatorStack.size() - 1;
130        StackOp = InfixOperatorStack[Idx];
131        if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
132          break;
133
134        // If we have an even parentheses count and we see a left parentheses,
135        // then stop processing.
136        if (!ParenCount && StackOp == IC_LPAREN)
137          break;
138
139        if (StackOp == IC_RPAREN) {
140          ++ParenCount;
141          InfixOperatorStack.pop_back();
142        } else if (StackOp == IC_LPAREN) {
143          --ParenCount;
144          InfixOperatorStack.pop_back();
145        } else {
146          InfixOperatorStack.pop_back();
147          PostfixStack.push_back(std::make_pair(StackOp, 0));
148        }
149      }
150      // Push the new operator.
151      InfixOperatorStack.push_back(Op);
152    }
153    int64_t execute() {
154      // Push any remaining operators onto the postfix stack.
155      while (!InfixOperatorStack.empty()) {
156        InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
157        if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
158          PostfixStack.push_back(std::make_pair(StackOp, 0));
159      }
160
161      if (PostfixStack.empty())
162        return 0;
163
164      SmallVector<ICToken, 16> OperandStack;
165      for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
166        ICToken Op = PostfixStack[i];
167        if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
168          OperandStack.push_back(Op);
169        } else {
170          assert (OperandStack.size() > 1 && "Too few operands.");
171          int64_t Val;
172          ICToken Op2 = OperandStack.pop_back_val();
173          ICToken Op1 = OperandStack.pop_back_val();
174          switch (Op.first) {
175          default:
176            report_fatal_error("Unexpected operator!");
177            break;
178          case IC_PLUS:
179            Val = Op1.second + Op2.second;
180            OperandStack.push_back(std::make_pair(IC_IMM, Val));
181            break;
182          case IC_MINUS:
183            Val = Op1.second - Op2.second;
184            OperandStack.push_back(std::make_pair(IC_IMM, Val));
185            break;
186          case IC_MULTIPLY:
187            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
188                    "Multiply operation with an immediate and a register!");
189            Val = Op1.second * Op2.second;
190            OperandStack.push_back(std::make_pair(IC_IMM, Val));
191            break;
192          case IC_DIVIDE:
193            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
194                    "Divide operation with an immediate and a register!");
195            assert (Op2.second != 0 && "Division by zero!");
196            Val = Op1.second / Op2.second;
197            OperandStack.push_back(std::make_pair(IC_IMM, Val));
198            break;
199          case IC_OR:
200            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
201                    "Or operation with an immediate and a register!");
202            Val = Op1.second | Op2.second;
203            OperandStack.push_back(std::make_pair(IC_IMM, Val));
204            break;
205          case IC_AND:
206            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207                    "And operation with an immediate and a register!");
208            Val = Op1.second & Op2.second;
209            OperandStack.push_back(std::make_pair(IC_IMM, Val));
210            break;
211          case IC_LSHIFT:
212            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
213                    "Left shift operation with an immediate and a register!");
214            Val = Op1.second << Op2.second;
215            OperandStack.push_back(std::make_pair(IC_IMM, Val));
216            break;
217          case IC_RSHIFT:
218            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
219                    "Right shift operation with an immediate and a register!");
220            Val = Op1.second >> Op2.second;
221            OperandStack.push_back(std::make_pair(IC_IMM, Val));
222            break;
223          }
224        }
225      }
226      assert (OperandStack.size() == 1 && "Expected a single result.");
227      return OperandStack.pop_back_val().second;
228    }
229  };
230
231  enum IntelExprState {
232    IES_OR,
233    IES_AND,
234    IES_LSHIFT,
235    IES_RSHIFT,
236    IES_PLUS,
237    IES_MINUS,
238    IES_NOT,
239    IES_MULTIPLY,
240    IES_DIVIDE,
241    IES_LBRAC,
242    IES_RBRAC,
243    IES_LPAREN,
244    IES_RPAREN,
245    IES_REGISTER,
246    IES_INTEGER,
247    IES_IDENTIFIER,
248    IES_ERROR
249  };
250
251  class IntelExprStateMachine {
252    IntelExprState State, PrevState;
253    unsigned BaseReg, IndexReg, TmpReg, Scale;
254    int64_t Imm;
255    const MCExpr *Sym;
256    StringRef SymName;
257    bool StopOnLBrac, AddImmPrefix;
258    InfixCalculator IC;
259    InlineAsmIdentifierInfo Info;
260  public:
261    IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
262      State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
263      Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
264      AddImmPrefix(addimmprefix) { Info.clear(); }
265
266    unsigned getBaseReg() { return BaseReg; }
267    unsigned getIndexReg() { return IndexReg; }
268    unsigned getScale() { return Scale; }
269    const MCExpr *getSym() { return Sym; }
270    StringRef getSymName() { return SymName; }
271    int64_t getImm() { return Imm + IC.execute(); }
272    bool isValidEndState() {
273      return State == IES_RBRAC || State == IES_INTEGER;
274    }
275    bool getStopOnLBrac() { return StopOnLBrac; }
276    bool getAddImmPrefix() { return AddImmPrefix; }
277    bool hadError() { return State == IES_ERROR; }
278
279    InlineAsmIdentifierInfo &getIdentifierInfo() {
280      return Info;
281    }
282
283    void onOr() {
284      IntelExprState CurrState = State;
285      switch (State) {
286      default:
287        State = IES_ERROR;
288        break;
289      case IES_INTEGER:
290      case IES_RPAREN:
291      case IES_REGISTER:
292        State = IES_OR;
293        IC.pushOperator(IC_OR);
294        break;
295      }
296      PrevState = CurrState;
297    }
298    void onAnd() {
299      IntelExprState CurrState = State;
300      switch (State) {
301      default:
302        State = IES_ERROR;
303        break;
304      case IES_INTEGER:
305      case IES_RPAREN:
306      case IES_REGISTER:
307        State = IES_AND;
308        IC.pushOperator(IC_AND);
309        break;
310      }
311      PrevState = CurrState;
312    }
313    void onLShift() {
314      IntelExprState CurrState = State;
315      switch (State) {
316      default:
317        State = IES_ERROR;
318        break;
319      case IES_INTEGER:
320      case IES_RPAREN:
321      case IES_REGISTER:
322        State = IES_LSHIFT;
323        IC.pushOperator(IC_LSHIFT);
324        break;
325      }
326      PrevState = CurrState;
327    }
328    void onRShift() {
329      IntelExprState CurrState = State;
330      switch (State) {
331      default:
332        State = IES_ERROR;
333        break;
334      case IES_INTEGER:
335      case IES_RPAREN:
336      case IES_REGISTER:
337        State = IES_RSHIFT;
338        IC.pushOperator(IC_RSHIFT);
339        break;
340      }
341      PrevState = CurrState;
342    }
343    void onPlus() {
344      IntelExprState CurrState = State;
345      switch (State) {
346      default:
347        State = IES_ERROR;
348        break;
349      case IES_INTEGER:
350      case IES_RPAREN:
351      case IES_REGISTER:
352        State = IES_PLUS;
353        IC.pushOperator(IC_PLUS);
354        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
355          // If we already have a BaseReg, then assume this is the IndexReg with
356          // a scale of 1.
357          if (!BaseReg) {
358            BaseReg = TmpReg;
359          } else {
360            assert (!IndexReg && "BaseReg/IndexReg already set!");
361            IndexReg = TmpReg;
362            Scale = 1;
363          }
364        }
365        break;
366      }
367      PrevState = CurrState;
368    }
369    void onMinus() {
370      IntelExprState CurrState = State;
371      switch (State) {
372      default:
373        State = IES_ERROR;
374        break;
375      case IES_PLUS:
376      case IES_NOT:
377      case IES_MULTIPLY:
378      case IES_DIVIDE:
379      case IES_LPAREN:
380      case IES_RPAREN:
381      case IES_LBRAC:
382      case IES_RBRAC:
383      case IES_INTEGER:
384      case IES_REGISTER:
385        State = IES_MINUS;
386        // Only push the minus operator if it is not a unary operator.
387        if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
388              CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
389              CurrState == IES_LPAREN || CurrState == IES_LBRAC))
390          IC.pushOperator(IC_MINUS);
391        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
392          // If we already have a BaseReg, then assume this is the IndexReg with
393          // a scale of 1.
394          if (!BaseReg) {
395            BaseReg = TmpReg;
396          } else {
397            assert (!IndexReg && "BaseReg/IndexReg already set!");
398            IndexReg = TmpReg;
399            Scale = 1;
400          }
401        }
402        break;
403      }
404      PrevState = CurrState;
405    }
406    void onNot() {
407      IntelExprState CurrState = State;
408      switch (State) {
409      default:
410        State = IES_ERROR;
411        break;
412      case IES_PLUS:
413      case IES_NOT:
414        State = IES_NOT;
415        break;
416      }
417      PrevState = CurrState;
418    }
419    void onRegister(unsigned Reg) {
420      IntelExprState CurrState = State;
421      switch (State) {
422      default:
423        State = IES_ERROR;
424        break;
425      case IES_PLUS:
426      case IES_LPAREN:
427        State = IES_REGISTER;
428        TmpReg = Reg;
429        IC.pushOperand(IC_REGISTER);
430        break;
431      case IES_MULTIPLY:
432        // Index Register - Scale * Register
433        if (PrevState == IES_INTEGER) {
434          assert (!IndexReg && "IndexReg already set!");
435          State = IES_REGISTER;
436          IndexReg = Reg;
437          // Get the scale and replace the 'Scale * Register' with '0'.
438          Scale = IC.popOperand();
439          IC.pushOperand(IC_IMM);
440          IC.popOperator();
441        } else {
442          State = IES_ERROR;
443        }
444        break;
445      }
446      PrevState = CurrState;
447    }
448    void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
449      PrevState = State;
450      switch (State) {
451      default:
452        State = IES_ERROR;
453        break;
454      case IES_PLUS:
455      case IES_MINUS:
456      case IES_NOT:
457        State = IES_INTEGER;
458        Sym = SymRef;
459        SymName = SymRefName;
460        IC.pushOperand(IC_IMM);
461        break;
462      }
463    }
464    bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
465      IntelExprState CurrState = State;
466      switch (State) {
467      default:
468        State = IES_ERROR;
469        break;
470      case IES_PLUS:
471      case IES_MINUS:
472      case IES_NOT:
473      case IES_OR:
474      case IES_AND:
475      case IES_LSHIFT:
476      case IES_RSHIFT:
477      case IES_DIVIDE:
478      case IES_MULTIPLY:
479      case IES_LPAREN:
480        State = IES_INTEGER;
481        if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
482          // Index Register - Register * Scale
483          assert (!IndexReg && "IndexReg already set!");
484          IndexReg = TmpReg;
485          Scale = TmpInt;
486          if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
487            ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
488            return true;
489          }
490          // Get the scale and replace the 'Register * Scale' with '0'.
491          IC.popOperator();
492        } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
493                    PrevState == IES_OR || PrevState == IES_AND ||
494                    PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
495                    PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
496                    PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
497                    PrevState == IES_NOT) &&
498                   CurrState == IES_MINUS) {
499          // Unary minus.  No need to pop the minus operand because it was never
500          // pushed.
501          IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
502        } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
503                    PrevState == IES_OR || PrevState == IES_AND ||
504                    PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
505                    PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
506                    PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
507                    PrevState == IES_NOT) &&
508                   CurrState == IES_NOT) {
509          // Unary not.  No need to pop the not operand because it was never
510          // pushed.
511          IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
512        } else {
513          IC.pushOperand(IC_IMM, TmpInt);
514        }
515        break;
516      }
517      PrevState = CurrState;
518      return false;
519    }
520    void onStar() {
521      PrevState = State;
522      switch (State) {
523      default:
524        State = IES_ERROR;
525        break;
526      case IES_INTEGER:
527      case IES_REGISTER:
528      case IES_RPAREN:
529        State = IES_MULTIPLY;
530        IC.pushOperator(IC_MULTIPLY);
531        break;
532      }
533    }
534    void onDivide() {
535      PrevState = State;
536      switch (State) {
537      default:
538        State = IES_ERROR;
539        break;
540      case IES_INTEGER:
541      case IES_RPAREN:
542        State = IES_DIVIDE;
543        IC.pushOperator(IC_DIVIDE);
544        break;
545      }
546    }
547    void onLBrac() {
548      PrevState = State;
549      switch (State) {
550      default:
551        State = IES_ERROR;
552        break;
553      case IES_RBRAC:
554        State = IES_PLUS;
555        IC.pushOperator(IC_PLUS);
556        break;
557      }
558    }
559    void onRBrac() {
560      IntelExprState CurrState = State;
561      switch (State) {
562      default:
563        State = IES_ERROR;
564        break;
565      case IES_INTEGER:
566      case IES_REGISTER:
567      case IES_RPAREN:
568        State = IES_RBRAC;
569        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
570          // If we already have a BaseReg, then assume this is the IndexReg with
571          // a scale of 1.
572          if (!BaseReg) {
573            BaseReg = TmpReg;
574          } else {
575            assert (!IndexReg && "BaseReg/IndexReg already set!");
576            IndexReg = TmpReg;
577            Scale = 1;
578          }
579        }
580        break;
581      }
582      PrevState = CurrState;
583    }
584    void onLParen() {
585      IntelExprState CurrState = State;
586      switch (State) {
587      default:
588        State = IES_ERROR;
589        break;
590      case IES_PLUS:
591      case IES_MINUS:
592      case IES_NOT:
593      case IES_OR:
594      case IES_AND:
595      case IES_LSHIFT:
596      case IES_RSHIFT:
597      case IES_MULTIPLY:
598      case IES_DIVIDE:
599      case IES_LPAREN:
600        // FIXME: We don't handle this type of unary minus or not, yet.
601        if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
602            PrevState == IES_OR || PrevState == IES_AND ||
603            PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
604            PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
605            PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
606            PrevState == IES_NOT) &&
607            (CurrState == IES_MINUS || CurrState == IES_NOT)) {
608          State = IES_ERROR;
609          break;
610        }
611        State = IES_LPAREN;
612        IC.pushOperator(IC_LPAREN);
613        break;
614      }
615      PrevState = CurrState;
616    }
617    void onRParen() {
618      PrevState = State;
619      switch (State) {
620      default:
621        State = IES_ERROR;
622        break;
623      case IES_INTEGER:
624      case IES_REGISTER:
625      case IES_RPAREN:
626        State = IES_RPAREN;
627        IC.pushOperator(IC_RPAREN);
628        break;
629      }
630    }
631  };
632
633  MCAsmParser &getParser() const { return Parser; }
634
635  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
636
637  bool Error(SMLoc L, const Twine &Msg,
638             ArrayRef<SMRange> Ranges = None,
639             bool MatchingInlineAsm = false) {
640    if (MatchingInlineAsm) return true;
641    return Parser.Error(L, Msg, Ranges);
642  }
643
644  bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
645          ArrayRef<SMRange> Ranges = None,
646          bool MatchingInlineAsm = false) {
647      Parser.eatToEndOfStatement();
648      return Error(L, Msg, Ranges, MatchingInlineAsm);
649  }
650
651  std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
652    Error(Loc, Msg);
653    return nullptr;
654  }
655
656  std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
657  std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
658  std::unique_ptr<X86Operand> ParseOperand();
659  std::unique_ptr<X86Operand> ParseATTOperand();
660  std::unique_ptr<X86Operand> ParseIntelOperand();
661  std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
662  bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
663  std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
664  std::unique_ptr<X86Operand>
665  ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
666  std::unique_ptr<X86Operand>
667  ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
668  bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
669  std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
670                                                       SMLoc Start,
671                                                       int64_t ImmDisp,
672                                                       unsigned Size);
673  bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
674                            InlineAsmIdentifierInfo &Info,
675                            bool IsUnevaluatedOperand, SMLoc &End);
676
677  std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
678
679  std::unique_ptr<X86Operand>
680  CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
681                        unsigned IndexReg, unsigned Scale, SMLoc Start,
682                        SMLoc End, unsigned Size, StringRef Identifier,
683                        InlineAsmIdentifierInfo &Info);
684
685  bool ParseDirectiveWord(unsigned Size, SMLoc L);
686  bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
687
688  bool processInstruction(MCInst &Inst, const OperandVector &Ops);
689
690  /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
691  /// instrumentation around Inst.
692  void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
693
694  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
695                               OperandVector &Operands, MCStreamer &Out,
696                               unsigned &ErrorInfo,
697                               bool MatchingInlineAsm) override;
698
699  /// doSrcDstMatch - Returns true if operands are matching in their
700  /// word size (%si and %di, %esi and %edi, etc.). Order depends on
701  /// the parsing mode (Intel vs. AT&T).
702  bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
703
704  /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
705  /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
706  /// \return \c true if no parsing errors occurred, \c false otherwise.
707  bool HandleAVX512Operand(OperandVector &Operands,
708                           const MCParsedAsmOperand &Op);
709
710  bool is64BitMode() const {
711    // FIXME: Can tablegen auto-generate this?
712    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
713  }
714  bool is32BitMode() const {
715    // FIXME: Can tablegen auto-generate this?
716    return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
717  }
718  bool is16BitMode() const {
719    // FIXME: Can tablegen auto-generate this?
720    return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
721  }
722  void SwitchMode(uint64_t mode) {
723    uint64_t oldMode = STI.getFeatureBits() &
724        (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
725    unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
726    setAvailableFeatures(FB);
727    assert(mode == (STI.getFeatureBits() &
728                    (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
729  }
730
731  bool isParsingIntelSyntax() {
732    return getParser().getAssemblerDialect();
733  }
734
735  /// @name Auto-generated Matcher Functions
736  /// {
737
738#define GET_ASSEMBLER_HEADER
739#include "X86GenAsmMatcher.inc"
740
741  /// }
742
743public:
744  X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
745               const MCInstrInfo &mii,
746               const MCTargetOptions &Options)
747      : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
748        InstInfo(nullptr) {
749
750    // Initialize the set of available features.
751    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
752    Instrumentation.reset(
753        CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
754  }
755
756  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
757
758  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
759                        SMLoc NameLoc, OperandVector &Operands) override;
760
761  bool ParseDirective(AsmToken DirectiveID) override;
762};
763} // end anonymous namespace
764
765/// @name Auto-generated Match Functions
766/// {
767
768static unsigned MatchRegisterName(StringRef Name);
769
770/// }
771
772static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
773                                    StringRef &ErrMsg) {
774  // If we have both a base register and an index register make sure they are
775  // both 64-bit or 32-bit registers.
776  // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
777  if (BaseReg != 0 && IndexReg != 0) {
778    if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
779        (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
780         X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
781        IndexReg != X86::RIZ) {
782      ErrMsg = "base register is 64-bit, but index register is not";
783      return true;
784    }
785    if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
786        (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
787         X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
788        IndexReg != X86::EIZ){
789      ErrMsg = "base register is 32-bit, but index register is not";
790      return true;
791    }
792    if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
793      if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
794          X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
795        ErrMsg = "base register is 16-bit, but index register is not";
796        return true;
797      }
798      if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
799           IndexReg != X86::SI && IndexReg != X86::DI) ||
800          ((BaseReg == X86::SI || BaseReg == X86::DI) &&
801           IndexReg != X86::BX && IndexReg != X86::BP)) {
802        ErrMsg = "invalid 16-bit base/index register combination";
803        return true;
804      }
805    }
806  }
807  return false;
808}
809
810bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
811{
812  // Return true and let a normal complaint about bogus operands happen.
813  if (!Op1.isMem() || !Op2.isMem())
814    return true;
815
816  // Actually these might be the other way round if Intel syntax is
817  // being used. It doesn't matter.
818  unsigned diReg = Op1.Mem.BaseReg;
819  unsigned siReg = Op2.Mem.BaseReg;
820
821  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
822    return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
823  if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
824    return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
825  if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
826    return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
827  // Again, return true and let another error happen.
828  return true;
829}
830
831bool X86AsmParser::ParseRegister(unsigned &RegNo,
832                                 SMLoc &StartLoc, SMLoc &EndLoc) {
833  RegNo = 0;
834  const AsmToken &PercentTok = Parser.getTok();
835  StartLoc = PercentTok.getLoc();
836
837  // If we encounter a %, ignore it. This code handles registers with and
838  // without the prefix, unprefixed registers can occur in cfi directives.
839  if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
840    Parser.Lex(); // Eat percent token.
841
842  const AsmToken &Tok = Parser.getTok();
843  EndLoc = Tok.getEndLoc();
844
845  if (Tok.isNot(AsmToken::Identifier)) {
846    if (isParsingIntelSyntax()) return true;
847    return Error(StartLoc, "invalid register name",
848                 SMRange(StartLoc, EndLoc));
849  }
850
851  RegNo = MatchRegisterName(Tok.getString());
852
853  // If the match failed, try the register name as lowercase.
854  if (RegNo == 0)
855    RegNo = MatchRegisterName(Tok.getString().lower());
856
857  if (!is64BitMode()) {
858    // FIXME: This should be done using Requires<Not64BitMode> and
859    // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
860    // checked.
861    // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
862    // REX prefix.
863    if (RegNo == X86::RIZ ||
864        X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
865        X86II::isX86_64NonExtLowByteReg(RegNo) ||
866        X86II::isX86_64ExtendedReg(RegNo))
867      return Error(StartLoc, "register %"
868                   + Tok.getString() + " is only available in 64-bit mode",
869                   SMRange(StartLoc, EndLoc));
870  }
871
872  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
873  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
874    RegNo = X86::ST0;
875    Parser.Lex(); // Eat 'st'
876
877    // Check to see if we have '(4)' after %st.
878    if (getLexer().isNot(AsmToken::LParen))
879      return false;
880    // Lex the paren.
881    getParser().Lex();
882
883    const AsmToken &IntTok = Parser.getTok();
884    if (IntTok.isNot(AsmToken::Integer))
885      return Error(IntTok.getLoc(), "expected stack index");
886    switch (IntTok.getIntVal()) {
887    case 0: RegNo = X86::ST0; break;
888    case 1: RegNo = X86::ST1; break;
889    case 2: RegNo = X86::ST2; break;
890    case 3: RegNo = X86::ST3; break;
891    case 4: RegNo = X86::ST4; break;
892    case 5: RegNo = X86::ST5; break;
893    case 6: RegNo = X86::ST6; break;
894    case 7: RegNo = X86::ST7; break;
895    default: return Error(IntTok.getLoc(), "invalid stack index");
896    }
897
898    if (getParser().Lex().isNot(AsmToken::RParen))
899      return Error(Parser.getTok().getLoc(), "expected ')'");
900
901    EndLoc = Parser.getTok().getEndLoc();
902    Parser.Lex(); // Eat ')'
903    return false;
904  }
905
906  EndLoc = Parser.getTok().getEndLoc();
907
908  // If this is "db[0-7]", match it as an alias
909  // for dr[0-7].
910  if (RegNo == 0 && Tok.getString().size() == 3 &&
911      Tok.getString().startswith("db")) {
912    switch (Tok.getString()[2]) {
913    case '0': RegNo = X86::DR0; break;
914    case '1': RegNo = X86::DR1; break;
915    case '2': RegNo = X86::DR2; break;
916    case '3': RegNo = X86::DR3; break;
917    case '4': RegNo = X86::DR4; break;
918    case '5': RegNo = X86::DR5; break;
919    case '6': RegNo = X86::DR6; break;
920    case '7': RegNo = X86::DR7; break;
921    }
922
923    if (RegNo != 0) {
924      EndLoc = Parser.getTok().getEndLoc();
925      Parser.Lex(); // Eat it.
926      return false;
927    }
928  }
929
930  if (RegNo == 0) {
931    if (isParsingIntelSyntax()) return true;
932    return Error(StartLoc, "invalid register name",
933                 SMRange(StartLoc, EndLoc));
934  }
935
936  Parser.Lex(); // Eat identifier token.
937  return false;
938}
939
940std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
941  unsigned basereg =
942    is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
943  const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
944  return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
945                               /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
946}
947
948std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
949  unsigned basereg =
950    is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
951  const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
952  return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
953                               /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
954}
955
956std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
957  if (isParsingIntelSyntax())
958    return ParseIntelOperand();
959  return ParseATTOperand();
960}
961
962/// getIntelMemOperandSize - Return intel memory operand size.
963static unsigned getIntelMemOperandSize(StringRef OpStr) {
964  unsigned Size = StringSwitch<unsigned>(OpStr)
965    .Cases("BYTE", "byte", 8)
966    .Cases("WORD", "word", 16)
967    .Cases("DWORD", "dword", 32)
968    .Cases("QWORD", "qword", 64)
969    .Cases("XWORD", "xword", 80)
970    .Cases("XMMWORD", "xmmword", 128)
971    .Cases("YMMWORD", "ymmword", 256)
972    .Cases("ZMMWORD", "zmmword", 512)
973    .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
974    .Default(0);
975  return Size;
976}
977
978std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
979    unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
980    unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
981    InlineAsmIdentifierInfo &Info) {
982  // If this is not a VarDecl then assume it is a FuncDecl or some other label
983  // reference.  We need an 'r' constraint here, so we need to create register
984  // operand to ensure proper matching.  Just pick a GPR based on the size of
985  // a pointer.
986  if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
987    unsigned RegNo =
988        is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
989    return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
990                                 SMLoc(), Identifier, Info.OpDecl);
991  }
992
993  // We either have a direct symbol reference, or an offset from a symbol.  The
994  // parser always puts the symbol on the LHS, so look there for size
995  // calculation purposes.
996  const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
997  bool IsSymRef =
998      isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
999  if (IsSymRef) {
1000    if (!Size) {
1001      Size = Info.Type * 8; // Size is in terms of bits in this context.
1002      if (Size)
1003        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1004                                                    /*Len=*/0, Size));
1005    }
1006  }
1007
1008  // When parsing inline assembly we set the base register to a non-zero value
1009  // if we don't know the actual value at this time.  This is necessary to
1010  // get the matching correct in some cases.
1011  BaseReg = BaseReg ? BaseReg : 1;
1012  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1013                               End, Size, Identifier, Info.OpDecl);
1014}
1015
1016static void
1017RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1018                           StringRef SymName, int64_t ImmDisp,
1019                           int64_t FinalImmDisp, SMLoc &BracLoc,
1020                           SMLoc &StartInBrac, SMLoc &End) {
1021  // Remove the '[' and ']' from the IR string.
1022  AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1023  AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1024
1025  // If ImmDisp is non-zero, then we parsed a displacement before the
1026  // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1027  // If ImmDisp doesn't match the displacement computed by the state machine
1028  // then we have an additional displacement in the bracketed expression.
1029  if (ImmDisp != FinalImmDisp) {
1030    if (ImmDisp) {
1031      // We have an immediate displacement before the bracketed expression.
1032      // Adjust this to match the final immediate displacement.
1033      bool Found = false;
1034      for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1035             E = AsmRewrites->end(); I != E; ++I) {
1036        if ((*I).Loc.getPointer() > BracLoc.getPointer())
1037          continue;
1038        if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1039          assert (!Found && "ImmDisp already rewritten.");
1040          (*I).Kind = AOK_Imm;
1041          (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1042          (*I).Val = FinalImmDisp;
1043          Found = true;
1044          break;
1045        }
1046      }
1047      assert (Found && "Unable to rewrite ImmDisp.");
1048      (void)Found;
1049    } else {
1050      // We have a symbolic and an immediate displacement, but no displacement
1051      // before the bracketed expression.  Put the immediate displacement
1052      // before the bracketed expression.
1053      AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1054    }
1055  }
1056  // Remove all the ImmPrefix rewrites within the brackets.
1057  for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1058         E = AsmRewrites->end(); I != E; ++I) {
1059    if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1060      continue;
1061    if ((*I).Kind == AOK_ImmPrefix)
1062      (*I).Kind = AOK_Delete;
1063  }
1064  const char *SymLocPtr = SymName.data();
1065  // Skip everything before the symbol.
1066  if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1067    assert(Len > 0 && "Expected a non-negative length.");
1068    AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1069  }
1070  // Skip everything after the symbol.
1071  if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1072    SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1073    assert(Len > 0 && "Expected a non-negative length.");
1074    AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1075  }
1076}
1077
1078bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1079  const AsmToken &Tok = Parser.getTok();
1080
1081  bool Done = false;
1082  while (!Done) {
1083    bool UpdateLocLex = true;
1084
1085    // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1086    // identifier.  Don't try an parse it as a register.
1087    if (Tok.getString().startswith("."))
1088      break;
1089
1090    // If we're parsing an immediate expression, we don't expect a '['.
1091    if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1092      break;
1093
1094    AsmToken::TokenKind TK = getLexer().getKind();
1095    switch (TK) {
1096    default: {
1097      if (SM.isValidEndState()) {
1098        Done = true;
1099        break;
1100      }
1101      return Error(Tok.getLoc(), "unknown token in expression");
1102    }
1103    case AsmToken::EndOfStatement: {
1104      Done = true;
1105      break;
1106    }
1107    case AsmToken::String:
1108    case AsmToken::Identifier: {
1109      // This could be a register or a symbolic displacement.
1110      unsigned TmpReg;
1111      const MCExpr *Val;
1112      SMLoc IdentLoc = Tok.getLoc();
1113      StringRef Identifier = Tok.getString();
1114      if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1115        SM.onRegister(TmpReg);
1116        UpdateLocLex = false;
1117        break;
1118      } else {
1119        if (!isParsingInlineAsm()) {
1120          if (getParser().parsePrimaryExpr(Val, End))
1121            return Error(Tok.getLoc(), "Unexpected identifier!");
1122        } else {
1123          // This is a dot operator, not an adjacent identifier.
1124          if (Identifier.find('.') != StringRef::npos) {
1125            return false;
1126          } else {
1127            InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1128            if (ParseIntelIdentifier(Val, Identifier, Info,
1129                                     /*Unevaluated=*/false, End))
1130              return true;
1131          }
1132        }
1133        SM.onIdentifierExpr(Val, Identifier);
1134        UpdateLocLex = false;
1135        break;
1136      }
1137      return Error(Tok.getLoc(), "Unexpected identifier!");
1138    }
1139    case AsmToken::Integer: {
1140      StringRef ErrMsg;
1141      if (isParsingInlineAsm() && SM.getAddImmPrefix())
1142        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1143                                                    Tok.getLoc()));
1144      // Look for 'b' or 'f' following an Integer as a directional label
1145      SMLoc Loc = getTok().getLoc();
1146      int64_t IntVal = getTok().getIntVal();
1147      End = consumeToken();
1148      UpdateLocLex = false;
1149      if (getLexer().getKind() == AsmToken::Identifier) {
1150        StringRef IDVal = getTok().getString();
1151        if (IDVal == "f" || IDVal == "b") {
1152          MCSymbol *Sym =
1153              getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1154          MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1155          const MCExpr *Val =
1156	    MCSymbolRefExpr::Create(Sym, Variant, getContext());
1157          if (IDVal == "b" && Sym->isUndefined())
1158            return Error(Loc, "invalid reference to undefined symbol");
1159          StringRef Identifier = Sym->getName();
1160          SM.onIdentifierExpr(Val, Identifier);
1161          End = consumeToken();
1162        } else {
1163          if (SM.onInteger(IntVal, ErrMsg))
1164            return Error(Loc, ErrMsg);
1165        }
1166      } else {
1167        if (SM.onInteger(IntVal, ErrMsg))
1168          return Error(Loc, ErrMsg);
1169      }
1170      break;
1171    }
1172    case AsmToken::Plus:    SM.onPlus(); break;
1173    case AsmToken::Minus:   SM.onMinus(); break;
1174    case AsmToken::Tilde:   SM.onNot(); break;
1175    case AsmToken::Star:    SM.onStar(); break;
1176    case AsmToken::Slash:   SM.onDivide(); break;
1177    case AsmToken::Pipe:    SM.onOr(); break;
1178    case AsmToken::Amp:     SM.onAnd(); break;
1179    case AsmToken::LessLess:
1180                            SM.onLShift(); break;
1181    case AsmToken::GreaterGreater:
1182                            SM.onRShift(); break;
1183    case AsmToken::LBrac:   SM.onLBrac(); break;
1184    case AsmToken::RBrac:   SM.onRBrac(); break;
1185    case AsmToken::LParen:  SM.onLParen(); break;
1186    case AsmToken::RParen:  SM.onRParen(); break;
1187    }
1188    if (SM.hadError())
1189      return Error(Tok.getLoc(), "unknown token in expression");
1190
1191    if (!Done && UpdateLocLex)
1192      End = consumeToken();
1193  }
1194  return false;
1195}
1196
1197std::unique_ptr<X86Operand>
1198X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1199                                       int64_t ImmDisp, unsigned Size) {
1200  const AsmToken &Tok = Parser.getTok();
1201  SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1202  if (getLexer().isNot(AsmToken::LBrac))
1203    return ErrorOperand(BracLoc, "Expected '[' token!");
1204  Parser.Lex(); // Eat '['
1205
1206  SMLoc StartInBrac = Tok.getLoc();
1207  // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ].  We
1208  // may have already parsed an immediate displacement before the bracketed
1209  // expression.
1210  IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1211  if (ParseIntelExpression(SM, End))
1212    return nullptr;
1213
1214  const MCExpr *Disp = nullptr;
1215  if (const MCExpr *Sym = SM.getSym()) {
1216    // A symbolic displacement.
1217    Disp = Sym;
1218    if (isParsingInlineAsm())
1219      RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1220                                 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1221                                 End);
1222  }
1223
1224  if (SM.getImm() || !Disp) {
1225    const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1226    if (Disp)
1227      Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1228    else
1229      Disp = Imm;  // An immediate displacement only.
1230  }
1231
1232  // Parse struct field access.  Intel requires a dot, but MSVC doesn't.  MSVC
1233  // will in fact do global lookup the field name inside all global typedefs,
1234  // but we don't emulate that.
1235  if (Tok.getString().find('.') != StringRef::npos) {
1236    const MCExpr *NewDisp;
1237    if (ParseIntelDotOperator(Disp, NewDisp))
1238      return nullptr;
1239
1240    End = Tok.getEndLoc();
1241    Parser.Lex();  // Eat the field.
1242    Disp = NewDisp;
1243  }
1244
1245  int BaseReg = SM.getBaseReg();
1246  int IndexReg = SM.getIndexReg();
1247  int Scale = SM.getScale();
1248  if (!isParsingInlineAsm()) {
1249    // handle [-42]
1250    if (!BaseReg && !IndexReg) {
1251      if (!SegReg)
1252        return X86Operand::CreateMem(Disp, Start, End, Size);
1253      else
1254        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1255    }
1256    StringRef ErrMsg;
1257    if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1258      Error(StartInBrac, ErrMsg);
1259      return nullptr;
1260    }
1261    return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1262                                 End, Size);
1263  }
1264
1265  InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1266  return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1267                               End, Size, SM.getSymName(), Info);
1268}
1269
1270// Inline assembly may use variable names with namespace alias qualifiers.
1271bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1272                                        StringRef &Identifier,
1273                                        InlineAsmIdentifierInfo &Info,
1274                                        bool IsUnevaluatedOperand, SMLoc &End) {
1275  assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1276  Val = nullptr;
1277
1278  StringRef LineBuf(Identifier.data());
1279  SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1280
1281  const AsmToken &Tok = Parser.getTok();
1282
1283  // Advance the token stream until the end of the current token is
1284  // after the end of what the frontend claimed.
1285  const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1286  while (true) {
1287    End = Tok.getEndLoc();
1288    getLexer().Lex();
1289
1290    assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1291    if (End.getPointer() == EndPtr) break;
1292  }
1293
1294  // Create the symbol reference.
1295  Identifier = LineBuf;
1296  MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1297  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1298  Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1299  return false;
1300}
1301
1302/// \brief Parse intel style segment override.
1303std::unique_ptr<X86Operand>
1304X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1305                                        unsigned Size) {
1306  assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1307  const AsmToken &Tok = Parser.getTok(); // Eat colon.
1308  if (Tok.isNot(AsmToken::Colon))
1309    return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1310  Parser.Lex(); // Eat ':'
1311
1312  int64_t ImmDisp = 0;
1313  if (getLexer().is(AsmToken::Integer)) {
1314    ImmDisp = Tok.getIntVal();
1315    AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1316
1317    if (isParsingInlineAsm())
1318      InstInfo->AsmRewrites->push_back(
1319          AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1320
1321    if (getLexer().isNot(AsmToken::LBrac)) {
1322      // An immediate following a 'segment register', 'colon' token sequence can
1323      // be followed by a bracketed expression.  If it isn't we know we have our
1324      // final segment override.
1325      const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1326      return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1327                                   /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1328                                   Size);
1329    }
1330  }
1331
1332  if (getLexer().is(AsmToken::LBrac))
1333    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1334
1335  const MCExpr *Val;
1336  SMLoc End;
1337  if (!isParsingInlineAsm()) {
1338    if (getParser().parsePrimaryExpr(Val, End))
1339      return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1340
1341    return X86Operand::CreateMem(Val, Start, End, Size);
1342  }
1343
1344  InlineAsmIdentifierInfo Info;
1345  StringRef Identifier = Tok.getString();
1346  if (ParseIntelIdentifier(Val, Identifier, Info,
1347                           /*Unevaluated=*/false, End))
1348    return nullptr;
1349  return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1350                               /*Scale=*/1, Start, End, Size, Identifier, Info);
1351}
1352
1353/// ParseIntelMemOperand - Parse intel style memory operand.
1354std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1355                                                               SMLoc Start,
1356                                                               unsigned Size) {
1357  const AsmToken &Tok = Parser.getTok();
1358  SMLoc End;
1359
1360  // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1361  if (getLexer().is(AsmToken::LBrac))
1362    return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1363  assert(ImmDisp == 0);
1364
1365  const MCExpr *Val;
1366  if (!isParsingInlineAsm()) {
1367    if (getParser().parsePrimaryExpr(Val, End))
1368      return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1369
1370    return X86Operand::CreateMem(Val, Start, End, Size);
1371  }
1372
1373  InlineAsmIdentifierInfo Info;
1374  StringRef Identifier = Tok.getString();
1375  if (ParseIntelIdentifier(Val, Identifier, Info,
1376                           /*Unevaluated=*/false, End))
1377    return nullptr;
1378
1379  if (!getLexer().is(AsmToken::LBrac))
1380    return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1381                                 /*Scale=*/1, Start, End, Size, Identifier, Info);
1382
1383  Parser.Lex(); // Eat '['
1384
1385  // Parse Identifier [ ImmDisp ]
1386  IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1387                           /*AddImmPrefix=*/false);
1388  if (ParseIntelExpression(SM, End))
1389    return nullptr;
1390
1391  if (SM.getSym()) {
1392    Error(Start, "cannot use more than one symbol in memory operand");
1393    return nullptr;
1394  }
1395  if (SM.getBaseReg()) {
1396    Error(Start, "cannot use base register with variable reference");
1397    return nullptr;
1398  }
1399  if (SM.getIndexReg()) {
1400    Error(Start, "cannot use index register with variable reference");
1401    return nullptr;
1402  }
1403
1404  const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1405  // BaseReg is non-zero to avoid assertions.  In the context of inline asm,
1406  // we're pointing to a local variable in memory, so the base register is
1407  // really the frame or stack pointer.
1408  return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1409                               /*Scale=*/1, Start, End, Size, Identifier,
1410                               Info.OpDecl);
1411}
1412
1413/// Parse the '.' operator.
1414bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1415                                                const MCExpr *&NewDisp) {
1416  const AsmToken &Tok = Parser.getTok();
1417  int64_t OrigDispVal, DotDispVal;
1418
1419  // FIXME: Handle non-constant expressions.
1420  if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1421    OrigDispVal = OrigDisp->getValue();
1422  else
1423    return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1424
1425  // Drop the optional '.'.
1426  StringRef DotDispStr = Tok.getString();
1427  if (DotDispStr.startswith("."))
1428    DotDispStr = DotDispStr.drop_front(1);
1429
1430  // .Imm gets lexed as a real.
1431  if (Tok.is(AsmToken::Real)) {
1432    APInt DotDisp;
1433    DotDispStr.getAsInteger(10, DotDisp);
1434    DotDispVal = DotDisp.getZExtValue();
1435  } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1436    unsigned DotDisp;
1437    std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1438    if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1439                                           DotDisp))
1440      return Error(Tok.getLoc(), "Unable to lookup field reference!");
1441    DotDispVal = DotDisp;
1442  } else
1443    return Error(Tok.getLoc(), "Unexpected token type!");
1444
1445  if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1446    SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1447    unsigned Len = DotDispStr.size();
1448    unsigned Val = OrigDispVal + DotDispVal;
1449    InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1450                                                Val));
1451  }
1452
1453  NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1454  return false;
1455}
1456
1457/// Parse the 'offset' operator.  This operator is used to specify the
1458/// location rather then the content of a variable.
1459std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1460  const AsmToken &Tok = Parser.getTok();
1461  SMLoc OffsetOfLoc = Tok.getLoc();
1462  Parser.Lex(); // Eat offset.
1463
1464  const MCExpr *Val;
1465  InlineAsmIdentifierInfo Info;
1466  SMLoc Start = Tok.getLoc(), End;
1467  StringRef Identifier = Tok.getString();
1468  if (ParseIntelIdentifier(Val, Identifier, Info,
1469                           /*Unevaluated=*/false, End))
1470    return nullptr;
1471
1472  // Don't emit the offset operator.
1473  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1474
1475  // The offset operator will have an 'r' constraint, thus we need to create
1476  // register operand to ensure proper matching.  Just pick a GPR based on
1477  // the size of a pointer.
1478  unsigned RegNo =
1479      is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1480  return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1481                               OffsetOfLoc, Identifier, Info.OpDecl);
1482}
1483
1484enum IntelOperatorKind {
1485  IOK_LENGTH,
1486  IOK_SIZE,
1487  IOK_TYPE
1488};
1489
1490/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators.  The LENGTH operator
1491/// returns the number of elements in an array.  It returns the value 1 for
1492/// non-array variables.  The SIZE operator returns the size of a C or C++
1493/// variable.  A variable's size is the product of its LENGTH and TYPE.  The
1494/// TYPE operator returns the size of a C or C++ type or variable. If the
1495/// variable is an array, TYPE returns the size of a single element.
1496std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1497  const AsmToken &Tok = Parser.getTok();
1498  SMLoc TypeLoc = Tok.getLoc();
1499  Parser.Lex(); // Eat operator.
1500
1501  const MCExpr *Val = nullptr;
1502  InlineAsmIdentifierInfo Info;
1503  SMLoc Start = Tok.getLoc(), End;
1504  StringRef Identifier = Tok.getString();
1505  if (ParseIntelIdentifier(Val, Identifier, Info,
1506                           /*Unevaluated=*/true, End))
1507    return nullptr;
1508
1509  if (!Info.OpDecl)
1510    return ErrorOperand(Start, "unable to lookup expression");
1511
1512  unsigned CVal = 0;
1513  switch(OpKind) {
1514  default: llvm_unreachable("Unexpected operand kind!");
1515  case IOK_LENGTH: CVal = Info.Length; break;
1516  case IOK_SIZE: CVal = Info.Size; break;
1517  case IOK_TYPE: CVal = Info.Type; break;
1518  }
1519
1520  // Rewrite the type operator and the C or C++ type or variable in terms of an
1521  // immediate.  E.g. TYPE foo -> $$4
1522  unsigned Len = End.getPointer() - TypeLoc.getPointer();
1523  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1524
1525  const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1526  return X86Operand::CreateImm(Imm, Start, End);
1527}
1528
1529std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1530  const AsmToken &Tok = Parser.getTok();
1531  SMLoc Start, End;
1532
1533  // Offset, length, type and size operators.
1534  if (isParsingInlineAsm()) {
1535    StringRef AsmTokStr = Tok.getString();
1536    if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1537      return ParseIntelOffsetOfOperator();
1538    if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1539      return ParseIntelOperator(IOK_LENGTH);
1540    if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1541      return ParseIntelOperator(IOK_SIZE);
1542    if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1543      return ParseIntelOperator(IOK_TYPE);
1544  }
1545
1546  unsigned Size = getIntelMemOperandSize(Tok.getString());
1547  if (Size) {
1548    Parser.Lex(); // Eat operand size (e.g., byte, word).
1549    if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1550      return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1551    Parser.Lex(); // Eat ptr.
1552  }
1553  Start = Tok.getLoc();
1554
1555  // Immediate.
1556  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1557      getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1558    AsmToken StartTok = Tok;
1559    IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1560                             /*AddImmPrefix=*/false);
1561    if (ParseIntelExpression(SM, End))
1562      return nullptr;
1563
1564    int64_t Imm = SM.getImm();
1565    if (isParsingInlineAsm()) {
1566      unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1567      if (StartTok.getString().size() == Len)
1568        // Just add a prefix if this wasn't a complex immediate expression.
1569        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1570      else
1571        // Otherwise, rewrite the complex expression as a single immediate.
1572        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1573    }
1574
1575    if (getLexer().isNot(AsmToken::LBrac)) {
1576      // If a directional label (ie. 1f or 2b) was parsed above from
1577      // ParseIntelExpression() then SM.getSym() was set to a pointer to
1578      // to the MCExpr with the directional local symbol and this is a
1579      // memory operand not an immediate operand.
1580      if (SM.getSym())
1581        return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1582
1583      const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1584      return X86Operand::CreateImm(ImmExpr, Start, End);
1585    }
1586
1587    // Only positive immediates are valid.
1588    if (Imm < 0)
1589      return ErrorOperand(Start, "expected a positive immediate displacement "
1590                          "before bracketed expr.");
1591
1592    // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1593    return ParseIntelMemOperand(Imm, Start, Size);
1594  }
1595
1596  // Register.
1597  unsigned RegNo = 0;
1598  if (!ParseRegister(RegNo, Start, End)) {
1599    // If this is a segment register followed by a ':', then this is the start
1600    // of a segment override, otherwise this is a normal register reference.
1601    if (getLexer().isNot(AsmToken::Colon))
1602      return X86Operand::CreateReg(RegNo, Start, End);
1603
1604    return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1605  }
1606
1607  // Memory operand.
1608  return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1609}
1610
1611std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1612  switch (getLexer().getKind()) {
1613  default:
1614    // Parse a memory operand with no segment register.
1615    return ParseMemOperand(0, Parser.getTok().getLoc());
1616  case AsmToken::Percent: {
1617    // Read the register.
1618    unsigned RegNo;
1619    SMLoc Start, End;
1620    if (ParseRegister(RegNo, Start, End)) return nullptr;
1621    if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1622      Error(Start, "%eiz and %riz can only be used as index registers",
1623            SMRange(Start, End));
1624      return nullptr;
1625    }
1626
1627    // If this is a segment register followed by a ':', then this is the start
1628    // of a memory reference, otherwise this is a normal register reference.
1629    if (getLexer().isNot(AsmToken::Colon))
1630      return X86Operand::CreateReg(RegNo, Start, End);
1631
1632    getParser().Lex(); // Eat the colon.
1633    return ParseMemOperand(RegNo, Start);
1634  }
1635  case AsmToken::Dollar: {
1636    // $42 -> immediate.
1637    SMLoc Start = Parser.getTok().getLoc(), End;
1638    Parser.Lex();
1639    const MCExpr *Val;
1640    if (getParser().parseExpression(Val, End))
1641      return nullptr;
1642    return X86Operand::CreateImm(Val, Start, End);
1643  }
1644  }
1645}
1646
1647bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1648                                       const MCParsedAsmOperand &Op) {
1649  if(STI.getFeatureBits() & X86::FeatureAVX512) {
1650    if (getLexer().is(AsmToken::LCurly)) {
1651      // Eat "{" and mark the current place.
1652      const SMLoc consumedToken = consumeToken();
1653      // Distinguish {1to<NUM>} from {%k<NUM>}.
1654      if(getLexer().is(AsmToken::Integer)) {
1655        // Parse memory broadcasting ({1to<NUM>}).
1656        if (getLexer().getTok().getIntVal() != 1)
1657          return !ErrorAndEatStatement(getLexer().getLoc(),
1658                                       "Expected 1to<NUM> at this point");
1659        Parser.Lex();  // Eat "1" of 1to8
1660        if (!getLexer().is(AsmToken::Identifier) ||
1661            !getLexer().getTok().getIdentifier().startswith("to"))
1662          return !ErrorAndEatStatement(getLexer().getLoc(),
1663                                       "Expected 1to<NUM> at this point");
1664        // Recognize only reasonable suffixes.
1665        const char *BroadcastPrimitive =
1666          StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1667            .Case("to8",  "{1to8}")
1668            .Case("to16", "{1to16}")
1669            .Default(nullptr);
1670        if (!BroadcastPrimitive)
1671          return !ErrorAndEatStatement(getLexer().getLoc(),
1672                                       "Invalid memory broadcast primitive.");
1673        Parser.Lex();  // Eat "toN" of 1toN
1674        if (!getLexer().is(AsmToken::RCurly))
1675          return !ErrorAndEatStatement(getLexer().getLoc(),
1676                                       "Expected } at this point");
1677        Parser.Lex();  // Eat "}"
1678        Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1679                                                   consumedToken));
1680        // No AVX512 specific primitives can pass
1681        // after memory broadcasting, so return.
1682        return true;
1683      } else {
1684        // Parse mask register {%k1}
1685        Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1686        if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1687          Operands.push_back(std::move(Op));
1688          if (!getLexer().is(AsmToken::RCurly))
1689            return !ErrorAndEatStatement(getLexer().getLoc(),
1690                                         "Expected } at this point");
1691          Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1692
1693          // Parse "zeroing non-masked" semantic {z}
1694          if (getLexer().is(AsmToken::LCurly)) {
1695            Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1696            if (!getLexer().is(AsmToken::Identifier) ||
1697                getLexer().getTok().getIdentifier() != "z")
1698              return !ErrorAndEatStatement(getLexer().getLoc(),
1699                                           "Expected z at this point");
1700            Parser.Lex();  // Eat the z
1701            if (!getLexer().is(AsmToken::RCurly))
1702              return !ErrorAndEatStatement(getLexer().getLoc(),
1703                                           "Expected } at this point");
1704            Parser.Lex();  // Eat the }
1705          }
1706        }
1707      }
1708    }
1709  }
1710  return true;
1711}
1712
1713/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
1714/// has already been parsed if present.
1715std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1716                                                          SMLoc MemStart) {
1717
1718  // We have to disambiguate a parenthesized expression "(4+5)" from the start
1719  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
1720  // only way to do this without lookahead is to eat the '(' and see what is
1721  // after it.
1722  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1723  if (getLexer().isNot(AsmToken::LParen)) {
1724    SMLoc ExprEnd;
1725    if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1726
1727    // After parsing the base expression we could either have a parenthesized
1728    // memory address or not.  If not, return now.  If so, eat the (.
1729    if (getLexer().isNot(AsmToken::LParen)) {
1730      // Unless we have a segment register, treat this as an immediate.
1731      if (SegReg == 0)
1732        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1733      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1734    }
1735
1736    // Eat the '('.
1737    Parser.Lex();
1738  } else {
1739    // Okay, we have a '('.  We don't know if this is an expression or not, but
1740    // so we have to eat the ( to see beyond it.
1741    SMLoc LParenLoc = Parser.getTok().getLoc();
1742    Parser.Lex(); // Eat the '('.
1743
1744    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1745      // Nothing to do here, fall into the code below with the '(' part of the
1746      // memory operand consumed.
1747    } else {
1748      SMLoc ExprEnd;
1749
1750      // It must be an parenthesized expression, parse it now.
1751      if (getParser().parseParenExpression(Disp, ExprEnd))
1752        return nullptr;
1753
1754      // After parsing the base expression we could either have a parenthesized
1755      // memory address or not.  If not, return now.  If so, eat the (.
1756      if (getLexer().isNot(AsmToken::LParen)) {
1757        // Unless we have a segment register, treat this as an immediate.
1758        if (SegReg == 0)
1759          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1760        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1761      }
1762
1763      // Eat the '('.
1764      Parser.Lex();
1765    }
1766  }
1767
1768  // If we reached here, then we just ate the ( of the memory operand.  Process
1769  // the rest of the memory operand.
1770  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1771  SMLoc IndexLoc, BaseLoc;
1772
1773  if (getLexer().is(AsmToken::Percent)) {
1774    SMLoc StartLoc, EndLoc;
1775    BaseLoc = Parser.getTok().getLoc();
1776    if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1777    if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1778      Error(StartLoc, "eiz and riz can only be used as index registers",
1779            SMRange(StartLoc, EndLoc));
1780      return nullptr;
1781    }
1782  }
1783
1784  if (getLexer().is(AsmToken::Comma)) {
1785    Parser.Lex(); // Eat the comma.
1786    IndexLoc = Parser.getTok().getLoc();
1787
1788    // Following the comma we should have either an index register, or a scale
1789    // value. We don't support the later form, but we want to parse it
1790    // correctly.
1791    //
1792    // Not that even though it would be completely consistent to support syntax
1793    // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1794    if (getLexer().is(AsmToken::Percent)) {
1795      SMLoc L;
1796      if (ParseRegister(IndexReg, L, L)) return nullptr;
1797
1798      if (getLexer().isNot(AsmToken::RParen)) {
1799        // Parse the scale amount:
1800        //  ::= ',' [scale-expression]
1801        if (getLexer().isNot(AsmToken::Comma)) {
1802          Error(Parser.getTok().getLoc(),
1803                "expected comma in scale expression");
1804          return nullptr;
1805        }
1806        Parser.Lex(); // Eat the comma.
1807
1808        if (getLexer().isNot(AsmToken::RParen)) {
1809          SMLoc Loc = Parser.getTok().getLoc();
1810
1811          int64_t ScaleVal;
1812          if (getParser().parseAbsoluteExpression(ScaleVal)){
1813            Error(Loc, "expected scale expression");
1814            return nullptr;
1815          }
1816
1817          // Validate the scale amount.
1818	  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1819              ScaleVal != 1) {
1820            Error(Loc, "scale factor in 16-bit address must be 1");
1821            return nullptr;
1822	  }
1823          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1824            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1825            return nullptr;
1826          }
1827          Scale = (unsigned)ScaleVal;
1828        }
1829      }
1830    } else if (getLexer().isNot(AsmToken::RParen)) {
1831      // A scale amount without an index is ignored.
1832      // index.
1833      SMLoc Loc = Parser.getTok().getLoc();
1834
1835      int64_t Value;
1836      if (getParser().parseAbsoluteExpression(Value))
1837        return nullptr;
1838
1839      if (Value != 1)
1840        Warning(Loc, "scale factor without index register is ignored");
1841      Scale = 1;
1842    }
1843  }
1844
1845  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1846  if (getLexer().isNot(AsmToken::RParen)) {
1847    Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1848    return nullptr;
1849  }
1850  SMLoc MemEnd = Parser.getTok().getEndLoc();
1851  Parser.Lex(); // Eat the ')'.
1852
1853  // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1854  // and then only in non-64-bit modes. Except for DX, which is a special case
1855  // because an unofficial form of in/out instructions uses it.
1856  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1857      (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1858                         BaseReg != X86::SI && BaseReg != X86::DI)) &&
1859      BaseReg != X86::DX) {
1860    Error(BaseLoc, "invalid 16-bit base register");
1861    return nullptr;
1862  }
1863  if (BaseReg == 0 &&
1864      X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1865    Error(IndexLoc, "16-bit memory operand may not include only index register");
1866    return nullptr;
1867  }
1868
1869  StringRef ErrMsg;
1870  if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1871    Error(BaseLoc, ErrMsg);
1872    return nullptr;
1873  }
1874
1875  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1876                               MemStart, MemEnd);
1877}
1878
1879bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1880                                    SMLoc NameLoc, OperandVector &Operands) {
1881  InstInfo = &Info;
1882  StringRef PatchedName = Name;
1883
1884  // FIXME: Hack to recognize setneb as setne.
1885  if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1886      PatchedName != "setb" && PatchedName != "setnb")
1887    PatchedName = PatchedName.substr(0, Name.size()-1);
1888
1889  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1890  const MCExpr *ExtraImmOp = nullptr;
1891  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1892      (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1893       PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1894    bool IsVCMP = PatchedName[0] == 'v';
1895    unsigned SSECCIdx = IsVCMP ? 4 : 3;
1896    unsigned SSEComparisonCode = StringSwitch<unsigned>(
1897      PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1898      .Case("eq",       0x00)
1899      .Case("lt",       0x01)
1900      .Case("le",       0x02)
1901      .Case("unord",    0x03)
1902      .Case("neq",      0x04)
1903      .Case("nlt",      0x05)
1904      .Case("nle",      0x06)
1905      .Case("ord",      0x07)
1906      /* AVX only from here */
1907      .Case("eq_uq",    0x08)
1908      .Case("nge",      0x09)
1909      .Case("ngt",      0x0A)
1910      .Case("false",    0x0B)
1911      .Case("neq_oq",   0x0C)
1912      .Case("ge",       0x0D)
1913      .Case("gt",       0x0E)
1914      .Case("true",     0x0F)
1915      .Case("eq_os",    0x10)
1916      .Case("lt_oq",    0x11)
1917      .Case("le_oq",    0x12)
1918      .Case("unord_s",  0x13)
1919      .Case("neq_us",   0x14)
1920      .Case("nlt_uq",   0x15)
1921      .Case("nle_uq",   0x16)
1922      .Case("ord_s",    0x17)
1923      .Case("eq_us",    0x18)
1924      .Case("nge_uq",   0x19)
1925      .Case("ngt_uq",   0x1A)
1926      .Case("false_os", 0x1B)
1927      .Case("neq_os",   0x1C)
1928      .Case("ge_oq",    0x1D)
1929      .Case("gt_oq",    0x1E)
1930      .Case("true_us",  0x1F)
1931      .Default(~0U);
1932    if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1933      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1934                                          getParser().getContext());
1935      if (PatchedName.endswith("ss")) {
1936        PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1937      } else if (PatchedName.endswith("sd")) {
1938        PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1939      } else if (PatchedName.endswith("ps")) {
1940        PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1941      } else {
1942        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1943        PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1944      }
1945    }
1946  }
1947
1948  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1949
1950  if (ExtraImmOp && !isParsingIntelSyntax())
1951    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1952
1953  // Determine whether this is an instruction prefix.
1954  bool isPrefix =
1955    Name == "lock" || Name == "rep" ||
1956    Name == "repe" || Name == "repz" ||
1957    Name == "repne" || Name == "repnz" ||
1958    Name == "rex64" || Name == "data16";
1959
1960
1961  // This does the actual operand parsing.  Don't parse any more if we have a
1962  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1963  // just want to parse the "lock" as the first instruction and the "incl" as
1964  // the next one.
1965  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1966
1967    // Parse '*' modifier.
1968    if (getLexer().is(AsmToken::Star))
1969      Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1970
1971    // Read the operands.
1972    while(1) {
1973      if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1974        Operands.push_back(std::move(Op));
1975        if (!HandleAVX512Operand(Operands, *Operands.back()))
1976          return true;
1977      } else {
1978         Parser.eatToEndOfStatement();
1979         return true;
1980      }
1981      // check for comma and eat it
1982      if (getLexer().is(AsmToken::Comma))
1983        Parser.Lex();
1984      else
1985        break;
1986     }
1987
1988    if (getLexer().isNot(AsmToken::EndOfStatement))
1989      return ErrorAndEatStatement(getLexer().getLoc(),
1990                                  "unexpected token in argument list");
1991   }
1992
1993  // Consume the EndOfStatement or the prefix separator Slash
1994  if (getLexer().is(AsmToken::EndOfStatement) ||
1995      (isPrefix && getLexer().is(AsmToken::Slash)))
1996    Parser.Lex();
1997
1998  if (ExtraImmOp && isParsingIntelSyntax())
1999    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2000
2001  // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2002  // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
2003  // documented form in various unofficial manuals, so a lot of code uses it.
2004  if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2005      Operands.size() == 3) {
2006    X86Operand &Op = (X86Operand &)*Operands.back();
2007    if (Op.isMem() && Op.Mem.SegReg == 0 &&
2008        isa<MCConstantExpr>(Op.Mem.Disp) &&
2009        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2010        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2011      SMLoc Loc = Op.getEndLoc();
2012      Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2013    }
2014  }
2015  // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2016  if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2017      Operands.size() == 3) {
2018    X86Operand &Op = (X86Operand &)*Operands[1];
2019    if (Op.isMem() && Op.Mem.SegReg == 0 &&
2020        isa<MCConstantExpr>(Op.Mem.Disp) &&
2021        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2022        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2023      SMLoc Loc = Op.getEndLoc();
2024      Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2025    }
2026  }
2027
2028  // Append default arguments to "ins[bwld]"
2029  if (Name.startswith("ins") && Operands.size() == 1 &&
2030      (Name == "insb" || Name == "insw" || Name == "insl" ||
2031       Name == "insd" )) {
2032    if (isParsingIntelSyntax()) {
2033      Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2034      Operands.push_back(DefaultMemDIOperand(NameLoc));
2035    } else {
2036      Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2037      Operands.push_back(DefaultMemDIOperand(NameLoc));
2038    }
2039  }
2040
2041  // Append default arguments to "outs[bwld]"
2042  if (Name.startswith("outs") && Operands.size() == 1 &&
2043      (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2044       Name == "outsd" )) {
2045    if (isParsingIntelSyntax()) {
2046      Operands.push_back(DefaultMemSIOperand(NameLoc));
2047      Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2048    } else {
2049      Operands.push_back(DefaultMemSIOperand(NameLoc));
2050      Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2051    }
2052  }
2053
2054  // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2055  // values of $SIREG according to the mode. It would be nice if this
2056  // could be achieved with InstAlias in the tables.
2057  if (Name.startswith("lods") && Operands.size() == 1 &&
2058      (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2059       Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2060    Operands.push_back(DefaultMemSIOperand(NameLoc));
2061
2062  // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2063  // values of $DIREG according to the mode. It would be nice if this
2064  // could be achieved with InstAlias in the tables.
2065  if (Name.startswith("stos") && Operands.size() == 1 &&
2066      (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2067       Name == "stosl" || Name == "stosd" || Name == "stosq"))
2068    Operands.push_back(DefaultMemDIOperand(NameLoc));
2069
2070  // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2071  // values of $DIREG according to the mode. It would be nice if this
2072  // could be achieved with InstAlias in the tables.
2073  if (Name.startswith("scas") && Operands.size() == 1 &&
2074      (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2075       Name == "scasl" || Name == "scasd" || Name == "scasq"))
2076    Operands.push_back(DefaultMemDIOperand(NameLoc));
2077
2078  // Add default SI and DI operands to "cmps[bwlq]".
2079  if (Name.startswith("cmps") &&
2080      (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2081       Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2082    if (Operands.size() == 1) {
2083      if (isParsingIntelSyntax()) {
2084        Operands.push_back(DefaultMemSIOperand(NameLoc));
2085        Operands.push_back(DefaultMemDIOperand(NameLoc));
2086      } else {
2087        Operands.push_back(DefaultMemDIOperand(NameLoc));
2088        Operands.push_back(DefaultMemSIOperand(NameLoc));
2089      }
2090    } else if (Operands.size() == 3) {
2091      X86Operand &Op = (X86Operand &)*Operands[1];
2092      X86Operand &Op2 = (X86Operand &)*Operands[2];
2093      if (!doSrcDstMatch(Op, Op2))
2094        return Error(Op.getStartLoc(),
2095                     "mismatching source and destination index registers");
2096    }
2097  }
2098
2099  // Add default SI and DI operands to "movs[bwlq]".
2100  if ((Name.startswith("movs") &&
2101      (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2102       Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2103      (Name.startswith("smov") &&
2104      (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2105       Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2106    if (Operands.size() == 1) {
2107      if (Name == "movsd")
2108        Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2109      if (isParsingIntelSyntax()) {
2110        Operands.push_back(DefaultMemDIOperand(NameLoc));
2111        Operands.push_back(DefaultMemSIOperand(NameLoc));
2112      } else {
2113        Operands.push_back(DefaultMemSIOperand(NameLoc));
2114        Operands.push_back(DefaultMemDIOperand(NameLoc));
2115      }
2116    } else if (Operands.size() == 3) {
2117      X86Operand &Op = (X86Operand &)*Operands[1];
2118      X86Operand &Op2 = (X86Operand &)*Operands[2];
2119      if (!doSrcDstMatch(Op, Op2))
2120        return Error(Op.getStartLoc(),
2121                     "mismatching source and destination index registers");
2122    }
2123  }
2124
2125  // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
2126  // "shift <op>".
2127  if ((Name.startswith("shr") || Name.startswith("sar") ||
2128       Name.startswith("shl") || Name.startswith("sal") ||
2129       Name.startswith("rcl") || Name.startswith("rcr") ||
2130       Name.startswith("rol") || Name.startswith("ror")) &&
2131      Operands.size() == 3) {
2132    if (isParsingIntelSyntax()) {
2133      // Intel syntax
2134      X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2135      if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2136          cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2137        Operands.pop_back();
2138    } else {
2139      X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2140      if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2141          cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2142        Operands.erase(Operands.begin() + 1);
2143    }
2144  }
2145
2146  // Transforms "int $3" into "int3" as a size optimization.  We can't write an
2147  // instalias with an immediate operand yet.
2148  if (Name == "int" && Operands.size() == 2) {
2149    X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2150    if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2151        cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2152      Operands.erase(Operands.begin() + 1);
2153      static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2154    }
2155  }
2156
2157  return false;
2158}
2159
2160static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2161                            bool isCmp) {
2162  MCInst TmpInst;
2163  TmpInst.setOpcode(Opcode);
2164  if (!isCmp)
2165    TmpInst.addOperand(MCOperand::CreateReg(Reg));
2166  TmpInst.addOperand(MCOperand::CreateReg(Reg));
2167  TmpInst.addOperand(Inst.getOperand(0));
2168  Inst = TmpInst;
2169  return true;
2170}
2171
2172static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2173                                bool isCmp = false) {
2174  if (!Inst.getOperand(0).isImm() ||
2175      !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2176    return false;
2177
2178  return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2179}
2180
2181static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2182                                bool isCmp = false) {
2183  if (!Inst.getOperand(0).isImm() ||
2184      !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2185    return false;
2186
2187  return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2188}
2189
2190static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2191                                bool isCmp = false) {
2192  if (!Inst.getOperand(0).isImm() ||
2193      !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2194    return false;
2195
2196  return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2197}
2198
2199bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2200  switch (Inst.getOpcode()) {
2201  default: return false;
2202  case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2203  case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2204  case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2205  case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2206  case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2207  case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2208  case X86::OR16i16:  return convert16i16to16ri8(Inst, X86::OR16ri8);
2209  case X86::OR32i32:  return convert32i32to32ri8(Inst, X86::OR32ri8);
2210  case X86::OR64i32:  return convert64i32to64ri8(Inst, X86::OR64ri8);
2211  case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2212  case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2213  case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2214  case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2215  case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2216  case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2217  case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2218  case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2219  case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2220  case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2221  case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2222  case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2223  case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2224  case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2225  case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2226  case X86::VMOVAPDrr:
2227  case X86::VMOVAPDYrr:
2228  case X86::VMOVAPSrr:
2229  case X86::VMOVAPSYrr:
2230  case X86::VMOVDQArr:
2231  case X86::VMOVDQAYrr:
2232  case X86::VMOVDQUrr:
2233  case X86::VMOVDQUYrr:
2234  case X86::VMOVUPDrr:
2235  case X86::VMOVUPDYrr:
2236  case X86::VMOVUPSrr:
2237  case X86::VMOVUPSYrr: {
2238    if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2239        !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2240      return false;
2241
2242    unsigned NewOpc;
2243    switch (Inst.getOpcode()) {
2244    default: llvm_unreachable("Invalid opcode");
2245    case X86::VMOVAPDrr:  NewOpc = X86::VMOVAPDrr_REV;  break;
2246    case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2247    case X86::VMOVAPSrr:  NewOpc = X86::VMOVAPSrr_REV;  break;
2248    case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2249    case X86::VMOVDQArr:  NewOpc = X86::VMOVDQArr_REV;  break;
2250    case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2251    case X86::VMOVDQUrr:  NewOpc = X86::VMOVDQUrr_REV;  break;
2252    case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2253    case X86::VMOVUPDrr:  NewOpc = X86::VMOVUPDrr_REV;  break;
2254    case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2255    case X86::VMOVUPSrr:  NewOpc = X86::VMOVUPSrr_REV;  break;
2256    case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2257    }
2258    Inst.setOpcode(NewOpc);
2259    return true;
2260  }
2261  case X86::VMOVSDrr:
2262  case X86::VMOVSSrr: {
2263    if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2264        !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2265      return false;
2266    unsigned NewOpc;
2267    switch (Inst.getOpcode()) {
2268    default: llvm_unreachable("Invalid opcode");
2269    case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV;   break;
2270    case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV;   break;
2271    }
2272    Inst.setOpcode(NewOpc);
2273    return true;
2274  }
2275  }
2276}
2277
2278static const char *getSubtargetFeatureName(unsigned Val);
2279
2280void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2281                                   MCStreamer &Out) {
2282  Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
2283                                         Out);
2284  Out.EmitInstruction(Inst, STI);
2285}
2286
2287bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2288                                           OperandVector &Operands,
2289                                           MCStreamer &Out, unsigned &ErrorInfo,
2290                                           bool MatchingInlineAsm) {
2291  assert(!Operands.empty() && "Unexpect empty operand list!");
2292  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2293  assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2294  ArrayRef<SMRange> EmptyRanges = None;
2295
2296  // First, handle aliases that expand to multiple instructions.
2297  // FIXME: This should be replaced with a real .td file alias mechanism.
2298  // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2299  // call.
2300  if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" ||
2301      Op.getToken() == "fstsww" || Op.getToken() == "fstcww" ||
2302      Op.getToken() == "finit" || Op.getToken() == "fsave" ||
2303      Op.getToken() == "fstenv" || Op.getToken() == "fclex") {
2304    MCInst Inst;
2305    Inst.setOpcode(X86::WAIT);
2306    Inst.setLoc(IDLoc);
2307    if (!MatchingInlineAsm)
2308      EmitInstruction(Inst, Operands, Out);
2309
2310    const char *Repl = StringSwitch<const char *>(Op.getToken())
2311                           .Case("finit", "fninit")
2312                           .Case("fsave", "fnsave")
2313                           .Case("fstcw", "fnstcw")
2314                           .Case("fstcww", "fnstcw")
2315                           .Case("fstenv", "fnstenv")
2316                           .Case("fstsw", "fnstsw")
2317                           .Case("fstsww", "fnstsw")
2318                           .Case("fclex", "fnclex")
2319                           .Default(nullptr);
2320    assert(Repl && "Unknown wait-prefixed instruction");
2321    Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2322  }
2323
2324  bool WasOriginallyInvalidOperand = false;
2325  MCInst Inst;
2326
2327  // First, try a direct match.
2328  switch (MatchInstructionImpl(Operands, Inst,
2329                               ErrorInfo, MatchingInlineAsm,
2330                               isParsingIntelSyntax())) {
2331  default: break;
2332  case Match_Success:
2333    // Some instructions need post-processing to, for example, tweak which
2334    // encoding is selected. Loop on it while changes happen so the
2335    // individual transformations can chain off each other.
2336    if (!MatchingInlineAsm)
2337      while (processInstruction(Inst, Operands))
2338        ;
2339
2340    Inst.setLoc(IDLoc);
2341    if (!MatchingInlineAsm)
2342      EmitInstruction(Inst, Operands, Out);
2343    Opcode = Inst.getOpcode();
2344    return false;
2345  case Match_MissingFeature: {
2346    assert(ErrorInfo && "Unknown missing feature!");
2347    // Special case the error message for the very common case where only
2348    // a single subtarget feature is missing.
2349    std::string Msg = "instruction requires:";
2350    unsigned Mask = 1;
2351    for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2352      if (ErrorInfo & Mask) {
2353        Msg += " ";
2354        Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2355      }
2356      Mask <<= 1;
2357    }
2358    return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2359  }
2360  case Match_InvalidOperand:
2361    WasOriginallyInvalidOperand = true;
2362    break;
2363  case Match_MnemonicFail:
2364    break;
2365  }
2366
2367  // FIXME: Ideally, we would only attempt suffix matches for things which are
2368  // valid prefixes, and we could just infer the right unambiguous
2369  // type. However, that requires substantially more matcher support than the
2370  // following hack.
2371
2372  // Change the operand to point to a temporary token.
2373  StringRef Base = Op.getToken();
2374  SmallString<16> Tmp;
2375  Tmp += Base;
2376  Tmp += ' ';
2377  Op.setTokenValue(Tmp.str());
2378
2379  // If this instruction starts with an 'f', then it is a floating point stack
2380  // instruction.  These come in up to three forms for 32-bit, 64-bit, and
2381  // 80-bit floating point, which use the suffixes s,l,t respectively.
2382  //
2383  // Otherwise, we assume that this may be an integer instruction, which comes
2384  // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2385  const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2386
2387  // Check for the various suffix matches.
2388  Tmp[Base.size()] = Suffixes[0];
2389  unsigned ErrorInfoIgnore;
2390  unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2391  unsigned Match1, Match2, Match3, Match4;
2392
2393  Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2394                                MatchingInlineAsm, isParsingIntelSyntax());
2395  // If this returned as a missing feature failure, remember that.
2396  if (Match1 == Match_MissingFeature)
2397    ErrorInfoMissingFeature = ErrorInfoIgnore;
2398  Tmp[Base.size()] = Suffixes[1];
2399  Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2400                                MatchingInlineAsm, isParsingIntelSyntax());
2401  // If this returned as a missing feature failure, remember that.
2402  if (Match2 == Match_MissingFeature)
2403    ErrorInfoMissingFeature = ErrorInfoIgnore;
2404  Tmp[Base.size()] = Suffixes[2];
2405  Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2406                                MatchingInlineAsm, isParsingIntelSyntax());
2407  // If this returned as a missing feature failure, remember that.
2408  if (Match3 == Match_MissingFeature)
2409    ErrorInfoMissingFeature = ErrorInfoIgnore;
2410  Tmp[Base.size()] = Suffixes[3];
2411  Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2412                                MatchingInlineAsm, isParsingIntelSyntax());
2413  // If this returned as a missing feature failure, remember that.
2414  if (Match4 == Match_MissingFeature)
2415    ErrorInfoMissingFeature = ErrorInfoIgnore;
2416
2417  // Restore the old token.
2418  Op.setTokenValue(Base);
2419
2420  // If exactly one matched, then we treat that as a successful match (and the
2421  // instruction will already have been filled in correctly, since the failing
2422  // matches won't have modified it).
2423  unsigned NumSuccessfulMatches =
2424    (Match1 == Match_Success) + (Match2 == Match_Success) +
2425    (Match3 == Match_Success) + (Match4 == Match_Success);
2426  if (NumSuccessfulMatches == 1) {
2427    Inst.setLoc(IDLoc);
2428    if (!MatchingInlineAsm)
2429      EmitInstruction(Inst, Operands, Out);
2430    Opcode = Inst.getOpcode();
2431    return false;
2432  }
2433
2434  // Otherwise, the match failed, try to produce a decent error message.
2435
2436  // If we had multiple suffix matches, then identify this as an ambiguous
2437  // match.
2438  if (NumSuccessfulMatches > 1) {
2439    char MatchChars[4];
2440    unsigned NumMatches = 0;
2441    if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2442    if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2443    if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2444    if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2445
2446    SmallString<126> Msg;
2447    raw_svector_ostream OS(Msg);
2448    OS << "ambiguous instructions require an explicit suffix (could be ";
2449    for (unsigned i = 0; i != NumMatches; ++i) {
2450      if (i != 0)
2451        OS << ", ";
2452      if (i + 1 == NumMatches)
2453        OS << "or ";
2454      OS << "'" << Base << MatchChars[i] << "'";
2455    }
2456    OS << ")";
2457    Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2458    return true;
2459  }
2460
2461  // Okay, we know that none of the variants matched successfully.
2462
2463  // If all of the instructions reported an invalid mnemonic, then the original
2464  // mnemonic was invalid.
2465  if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2466      (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2467    if (!WasOriginallyInvalidOperand) {
2468      ArrayRef<SMRange> Ranges =
2469          MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2470      return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2471                   Ranges, MatchingInlineAsm);
2472    }
2473
2474    // Recover location info for the operand if we know which was the problem.
2475    if (ErrorInfo != ~0U) {
2476      if (ErrorInfo >= Operands.size())
2477        return Error(IDLoc, "too few operands for instruction",
2478                     EmptyRanges, MatchingInlineAsm);
2479
2480      X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2481      if (Operand.getStartLoc().isValid()) {
2482        SMRange OperandRange = Operand.getLocRange();
2483        return Error(Operand.getStartLoc(), "invalid operand for instruction",
2484                     OperandRange, MatchingInlineAsm);
2485      }
2486    }
2487
2488    return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2489                 MatchingInlineAsm);
2490  }
2491
2492  // If one instruction matched with a missing feature, report this as a
2493  // missing feature.
2494  if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2495      (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2496    std::string Msg = "instruction requires:";
2497    unsigned Mask = 1;
2498    for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2499      if (ErrorInfoMissingFeature & Mask) {
2500        Msg += " ";
2501        Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2502      }
2503      Mask <<= 1;
2504    }
2505    return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2506  }
2507
2508  // If one instruction matched with an invalid operand, report this as an
2509  // operand failure.
2510  if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2511      (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2512    Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2513          MatchingInlineAsm);
2514    return true;
2515  }
2516
2517  // If all of these were an outright failure, report it in a useless way.
2518  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2519        EmptyRanges, MatchingInlineAsm);
2520  return true;
2521}
2522
2523
2524bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2525  StringRef IDVal = DirectiveID.getIdentifier();
2526  if (IDVal == ".word")
2527    return ParseDirectiveWord(2, DirectiveID.getLoc());
2528  else if (IDVal.startswith(".code"))
2529    return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2530  else if (IDVal.startswith(".att_syntax")) {
2531    getParser().setAssemblerDialect(0);
2532    return false;
2533  } else if (IDVal.startswith(".intel_syntax")) {
2534    getParser().setAssemblerDialect(1);
2535    if (getLexer().isNot(AsmToken::EndOfStatement)) {
2536      // FIXME: Handle noprefix
2537      if (Parser.getTok().getString() == "noprefix")
2538        Parser.Lex();
2539    }
2540    return false;
2541  }
2542  return true;
2543}
2544
2545/// ParseDirectiveWord
2546///  ::= .word [ expression (, expression)* ]
2547bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2548  if (getLexer().isNot(AsmToken::EndOfStatement)) {
2549    for (;;) {
2550      const MCExpr *Value;
2551      if (getParser().parseExpression(Value))
2552        return false;
2553
2554      getParser().getStreamer().EmitValue(Value, Size);
2555
2556      if (getLexer().is(AsmToken::EndOfStatement))
2557        break;
2558
2559      // FIXME: Improve diagnostic.
2560      if (getLexer().isNot(AsmToken::Comma)) {
2561        Error(L, "unexpected token in directive");
2562        return false;
2563      }
2564      Parser.Lex();
2565    }
2566  }
2567
2568  Parser.Lex();
2569  return false;
2570}
2571
2572/// ParseDirectiveCode
2573///  ::= .code16 | .code32 | .code64
2574bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2575  if (IDVal == ".code16") {
2576    Parser.Lex();
2577    if (!is16BitMode()) {
2578      SwitchMode(X86::Mode16Bit);
2579      getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2580    }
2581  } else if (IDVal == ".code32") {
2582    Parser.Lex();
2583    if (!is32BitMode()) {
2584      SwitchMode(X86::Mode32Bit);
2585      getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2586    }
2587  } else if (IDVal == ".code64") {
2588    Parser.Lex();
2589    if (!is64BitMode()) {
2590      SwitchMode(X86::Mode64Bit);
2591      getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2592    }
2593  } else {
2594    Error(L, "unknown directive " + IDVal);
2595    return false;
2596  }
2597
2598  return false;
2599}
2600
2601// Force static initialization.
2602extern "C" void LLVMInitializeX86AsmParser() {
2603  RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2604  RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2605}
2606
2607#define GET_REGISTER_MATCHER
2608#define GET_MATCHER_IMPLEMENTATION
2609#define GET_SUBTARGET_FEATURE_NAME
2610#include "X86GenAsmMatcher.inc"
2611