ARMAsmParser.cpp revision 1d6a26507bfd75758f5c8a29bccf577784ead751
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARM.h"
11#include "ARMAddressingModes.h"
12#include "ARMSubtarget.h"
13#include "llvm/MC/MCParser/MCAsmLexer.h"
14#include "llvm/MC/MCParser/MCAsmParser.h"
15#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
16#include "llvm/MC/MCContext.h"
17#include "llvm/MC/MCStreamer.h"
18#include "llvm/MC/MCExpr.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/Target/TargetRegistry.h"
21#include "llvm/Target/TargetAsmParser.h"
22#include "llvm/Support/SourceMgr.h"
23#include "llvm/Support/raw_ostream.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/StringSwitch.h"
26#include "llvm/ADT/Twine.h"
27using namespace llvm;
28
29// The shift types for register controlled shifts in arm memory addressing
30enum ShiftType {
31  Lsl,
32  Lsr,
33  Asr,
34  Ror,
35  Rrx
36};
37
38namespace {
39  struct ARMOperand;
40
41class ARMAsmParser : public TargetAsmParser {
42  MCAsmParser &Parser;
43  TargetMachine &TM;
44
45private:
46  MCAsmParser &getParser() const { return Parser; }
47
48  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
49
50  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
51
52  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
53
54  int TryParseRegister();
55  ARMOperand *TryParseRegisterWithWriteBack();
56  ARMOperand *ParseRegisterList();
57  ARMOperand *ParseMemory();
58
59  bool ParseMemoryOffsetReg(bool &Negative,
60                            bool &OffsetRegShifted,
61                            enum ShiftType &ShiftType,
62                            const MCExpr *&ShiftAmount,
63                            const MCExpr *&Offset,
64                            bool &OffsetIsReg,
65                            int &OffsetRegNum,
66                            SMLoc &E);
67
68  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
69
70  ARMOperand *ParseOperand();
71
72  bool ParseDirectiveWord(unsigned Size, SMLoc L);
73
74  bool ParseDirectiveThumb(SMLoc L);
75
76  bool ParseDirectiveThumbFunc(SMLoc L);
77
78  bool ParseDirectiveCode(SMLoc L);
79
80  bool ParseDirectiveSyntax(SMLoc L);
81
82  bool MatchAndEmitInstruction(SMLoc IDLoc,
83                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
84                               MCStreamer &Out);
85
86  /// @name Auto-generated Match Functions
87  /// {
88
89#define GET_ASSEMBLER_HEADER
90#include "ARMGenAsmMatcher.inc"
91
92  /// }
93
94
95public:
96  ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
97    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
98      // Initialize the set of available features.
99      setAvailableFeatures(ComputeAvailableFeatures(
100          &TM.getSubtarget<ARMSubtarget>()));
101    }
102
103  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
104                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
105
106  virtual bool ParseDirective(AsmToken DirectiveID);
107};
108} // end anonymous namespace
109
110namespace {
111
112/// ARMOperand - Instances of this class represent a parsed ARM machine
113/// instruction.
114struct ARMOperand : public MCParsedAsmOperand {
115public:
116  enum KindTy {
117    CondCode,
118    Immediate,
119    Memory,
120    Register,
121    Token
122  } Kind;
123
124  SMLoc StartLoc, EndLoc;
125
126  union {
127    struct {
128      ARMCC::CondCodes Val;
129    } CC;
130
131    struct {
132      const char *Data;
133      unsigned Length;
134    } Tok;
135
136    struct {
137      unsigned RegNum;
138      bool Writeback;
139    } Reg;
140
141    struct {
142      const MCExpr *Val;
143    } Imm;
144
145    // This is for all forms of ARM address expressions
146    struct {
147      unsigned BaseRegNum;
148      unsigned OffsetRegNum; // used when OffsetIsReg is true
149      const MCExpr *Offset; // used when OffsetIsReg is false
150      const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
151      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
152      unsigned
153        OffsetRegShifted : 1, // only used when OffsetIsReg is true
154        Preindexed : 1,
155        Postindexed : 1,
156        OffsetIsReg : 1,
157        Negative : 1, // only used when OffsetIsReg is true
158        Writeback : 1;
159    } Mem;
160
161  };
162
163  ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
164    Kind = o.Kind;
165    StartLoc = o.StartLoc;
166    EndLoc = o.EndLoc;
167    switch (Kind) {
168    case CondCode:
169      CC = o.CC;
170      break;
171    case Token:
172      Tok = o.Tok;
173      break;
174    case Register:
175      Reg = o.Reg;
176      break;
177    case Immediate:
178      Imm = o.Imm;
179      break;
180    case Memory:
181      Mem = o.Mem;
182      break;
183    }
184  }
185
186  /// getStartLoc - Get the location of the first token of this operand.
187  SMLoc getStartLoc() const { return StartLoc; }
188  /// getEndLoc - Get the location of the last token of this operand.
189  SMLoc getEndLoc() const { return EndLoc; }
190
191  ARMCC::CondCodes getCondCode() const {
192    assert(Kind == CondCode && "Invalid access!");
193    return CC.Val;
194  }
195
196  StringRef getToken() const {
197    assert(Kind == Token && "Invalid access!");
198    return StringRef(Tok.Data, Tok.Length);
199  }
200
201  unsigned getReg() const {
202    assert(Kind == Register && "Invalid access!");
203    return Reg.RegNum;
204  }
205
206  const MCExpr *getImm() const {
207    assert(Kind == Immediate && "Invalid access!");
208    return Imm.Val;
209  }
210
211  bool isCondCode() const { return Kind == CondCode; }
212  bool isImm() const { return Kind == Immediate; }
213  bool isReg() const { return Kind == Register; }
214  bool isToken() const { return Kind == Token; }
215  bool isMemory() const { return Kind == Memory; }
216
217  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
218    // Add as immediates when possible.  Null MCExpr = 0.
219    if (Expr == 0)
220      Inst.addOperand(MCOperand::CreateImm(0));
221    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
222      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
223    else
224      Inst.addOperand(MCOperand::CreateExpr(Expr));
225  }
226
227  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
228    assert(N == 2 && "Invalid number of operands!");
229    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
230    // FIXME: What belongs here?
231    Inst.addOperand(MCOperand::CreateReg(0));
232  }
233
234  void addRegOperands(MCInst &Inst, unsigned N) const {
235    assert(N == 1 && "Invalid number of operands!");
236    Inst.addOperand(MCOperand::CreateReg(getReg()));
237  }
238
239  void addImmOperands(MCInst &Inst, unsigned N) const {
240    assert(N == 1 && "Invalid number of operands!");
241    addExpr(Inst, getImm());
242  }
243
244
245  bool isMemMode5() const {
246    if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted ||
247        Mem.Writeback || Mem.Negative)
248      return false;
249    // If there is an offset expression, make sure it's valid.
250    if (!Mem.Offset)
251      return true;
252    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
253    if (!CE)
254      return false;
255    // The offset must be a multiple of 4 in the range 0-1020.
256    int64_t Value = CE->getValue();
257    return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
258  }
259
260  void addMemMode5Operands(MCInst &Inst, unsigned N) const {
261    assert(N == 2 && isMemMode5() && "Invalid number of operands!");
262
263    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
264    assert(!Mem.OffsetIsReg && "invalid mode 5 operand");
265
266    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
267    // the difference?
268    if (Mem.Offset) {
269      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
270      assert(CE && "Non-constant mode 5 offset operand!");
271
272      // The MCInst offset operand doesn't include the low two bits (like
273      // the instruction encoding).
274      int64_t Offset = CE->getValue() / 4;
275      if (Offset >= 0)
276        Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
277                                                               Offset)));
278      else
279        Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
280                                                               -Offset)));
281    } else {
282      Inst.addOperand(MCOperand::CreateImm(0));
283    }
284  }
285
286  virtual void dump(raw_ostream &OS) const;
287
288  static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
289    ARMOperand *Op = new ARMOperand(CondCode);
290    Op->CC.Val = CC;
291    Op->StartLoc = S;
292    Op->EndLoc = S;
293    return Op;
294  }
295
296  static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
297    ARMOperand *Op = new ARMOperand(Token);
298    Op->Tok.Data = Str.data();
299    Op->Tok.Length = Str.size();
300    Op->StartLoc = S;
301    Op->EndLoc = S;
302    return Op;
303  }
304
305  static ARMOperand *CreateReg(unsigned RegNum, bool Writeback, SMLoc S,
306                               SMLoc E) {
307    ARMOperand *Op = new ARMOperand(Register);
308    Op->Reg.RegNum = RegNum;
309    Op->Reg.Writeback = Writeback;
310    Op->StartLoc = S;
311    Op->EndLoc = E;
312    return Op;
313  }
314
315  static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
316    ARMOperand *Op = new ARMOperand(Immediate);
317    Op->Imm.Val = Val;
318    Op->StartLoc = S;
319    Op->EndLoc = E;
320    return Op;
321  }
322
323  static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
324                               const MCExpr *Offset, unsigned OffsetRegNum,
325                               bool OffsetRegShifted, enum ShiftType ShiftType,
326                               const MCExpr *ShiftAmount, bool Preindexed,
327                               bool Postindexed, bool Negative, bool Writeback,
328                               SMLoc S, SMLoc E) {
329    ARMOperand *Op = new ARMOperand(Memory);
330    Op->Mem.BaseRegNum = BaseRegNum;
331    Op->Mem.OffsetIsReg = OffsetIsReg;
332    Op->Mem.Offset = Offset;
333    Op->Mem.OffsetRegNum = OffsetRegNum;
334    Op->Mem.OffsetRegShifted = OffsetRegShifted;
335    Op->Mem.ShiftType = ShiftType;
336    Op->Mem.ShiftAmount = ShiftAmount;
337    Op->Mem.Preindexed = Preindexed;
338    Op->Mem.Postindexed = Postindexed;
339    Op->Mem.Negative = Negative;
340    Op->Mem.Writeback = Writeback;
341
342    Op->StartLoc = S;
343    Op->EndLoc = E;
344    return Op;
345  }
346
347private:
348  ARMOperand(KindTy K) : Kind(K) {}
349};
350
351} // end anonymous namespace.
352
353void ARMOperand::dump(raw_ostream &OS) const {
354  switch (Kind) {
355  case CondCode:
356    OS << ARMCondCodeToString(getCondCode());
357    break;
358  case Immediate:
359    getImm()->print(OS);
360    break;
361  case Memory:
362    OS << "<memory>";
363    break;
364  case Register:
365    OS << "<register " << getReg() << ">";
366    break;
367  case Token:
368    OS << "'" << getToken() << "'";
369    break;
370  }
371}
372
373/// @name Auto-generated Match Functions
374/// {
375
376static unsigned MatchRegisterName(StringRef Name);
377
378/// }
379
380/// Try to parse a register name.  The token must be an Identifier when called,
381/// and if it is a register name the token is eaten and the register number is
382/// returned.  Otherwise return -1.
383///
384int ARMAsmParser::TryParseRegister() {
385  const AsmToken &Tok = Parser.getTok();
386  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
387
388  // FIXME: Validate register for the current architecture; we have to do
389  // validation later, so maybe there is no need for this here.
390  int RegNum = MatchRegisterName(Tok.getString());
391  if (RegNum == -1)
392    return -1;
393  Parser.Lex(); // Eat identifier token.
394  return RegNum;
395}
396
397
398/// Try to parse a register name.  The token must be an Identifier when called,
399/// and if it is a register name the token is eaten and the register number is
400/// returned.  Otherwise return -1.
401///
402/// TODO this is likely to change to allow different register types and or to
403/// parse for a specific register type.
404ARMOperand *ARMAsmParser::TryParseRegisterWithWriteBack() {
405  SMLoc S = Parser.getTok().getLoc();
406  int RegNo = TryParseRegister();
407  if (RegNo == -1) return 0;
408
409  SMLoc E = Parser.getTok().getLoc();
410
411  bool Writeback = false;
412  const AsmToken &ExclaimTok = Parser.getTok();
413  if (ExclaimTok.is(AsmToken::Exclaim)) {
414    E = ExclaimTok.getLoc();
415    Writeback = true;
416    Parser.Lex(); // Eat exclaim token
417  }
418
419  return ARMOperand::CreateReg(RegNo, Writeback, S, E);
420}
421
422/// Parse a register list, return it if successful else return null.  The first
423/// token must be a '{' when called.
424ARMOperand *ARMAsmParser::ParseRegisterList() {
425  SMLoc S, E;
426  assert(Parser.getTok().is(AsmToken::LCurly) &&
427         "Token is not an Left Curly Brace");
428  S = Parser.getTok().getLoc();
429  Parser.Lex(); // Eat left curly brace token.
430
431  const AsmToken &RegTok = Parser.getTok();
432  SMLoc RegLoc = RegTok.getLoc();
433  if (RegTok.isNot(AsmToken::Identifier)) {
434    Error(RegLoc, "register expected");
435    return 0;
436  }
437  int RegNum = TryParseRegister();
438  if (RegNum == -1) {
439    Error(RegLoc, "register expected");
440    return 0;
441  }
442
443  unsigned RegList = 1 << RegNum;
444
445  int HighRegNum = RegNum;
446  // TODO ranges like "{Rn-Rm}"
447  while (Parser.getTok().is(AsmToken::Comma)) {
448    Parser.Lex(); // Eat comma token.
449
450    const AsmToken &RegTok = Parser.getTok();
451    SMLoc RegLoc = RegTok.getLoc();
452    if (RegTok.isNot(AsmToken::Identifier)) {
453      Error(RegLoc, "register expected");
454      return 0;
455    }
456    int RegNum = TryParseRegister();
457    if (RegNum == -1) {
458      Error(RegLoc, "register expected");
459      return 0;
460    }
461
462    if (RegList & (1 << RegNum))
463      Warning(RegLoc, "register duplicated in register list");
464    else if (RegNum <= HighRegNum)
465      Warning(RegLoc, "register not in ascending order in register list");
466    RegList |= 1 << RegNum;
467    HighRegNum = RegNum;
468  }
469  const AsmToken &RCurlyTok = Parser.getTok();
470  if (RCurlyTok.isNot(AsmToken::RCurly)) {
471    Error(RCurlyTok.getLoc(), "'}' expected");
472    return 0;
473  }
474  E = RCurlyTok.getLoc();
475  Parser.Lex(); // Eat left curly brace token.
476
477  // FIXME: Need to return an operand!
478  Error(E, "FIXME: register list parsing not implemented");
479  return 0;
480}
481
482/// Parse an arm memory expression, return false if successful else return true
483/// or an error.  The first token must be a '[' when called.
484/// TODO Only preindexing and postindexing addressing are started, unindexed
485/// with option, etc are still to do.
486ARMOperand *ARMAsmParser::ParseMemory() {
487  SMLoc S, E;
488  assert(Parser.getTok().is(AsmToken::LBrac) &&
489         "Token is not an Left Bracket");
490  S = Parser.getTok().getLoc();
491  Parser.Lex(); // Eat left bracket token.
492
493  const AsmToken &BaseRegTok = Parser.getTok();
494  if (BaseRegTok.isNot(AsmToken::Identifier)) {
495    Error(BaseRegTok.getLoc(), "register expected");
496    return 0;
497  }
498  int BaseRegNum = TryParseRegister();
499  if (BaseRegNum == -1) {
500    Error(BaseRegTok.getLoc(), "register expected");
501    return 0;
502  }
503
504  bool Preindexed = false;
505  bool Postindexed = false;
506  bool OffsetIsReg = false;
507  bool Negative = false;
508  bool Writeback = false;
509
510  // First look for preindexed address forms, that is after the "[Rn" we now
511  // have to see if the next token is a comma.
512  const AsmToken &Tok = Parser.getTok();
513  if (Tok.is(AsmToken::Comma)) {
514    Preindexed = true;
515    Parser.Lex(); // Eat comma token.
516    int OffsetRegNum;
517    bool OffsetRegShifted;
518    enum ShiftType ShiftType;
519    const MCExpr *ShiftAmount;
520    const MCExpr *Offset;
521    if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
522                             Offset, OffsetIsReg, OffsetRegNum, E))
523      return 0;
524    const AsmToken &RBracTok = Parser.getTok();
525    if (RBracTok.isNot(AsmToken::RBrac)) {
526      Error(RBracTok.getLoc(), "']' expected");
527      return 0;
528    }
529    E = RBracTok.getLoc();
530    Parser.Lex(); // Eat right bracket token.
531
532    const AsmToken &ExclaimTok = Parser.getTok();
533    if (ExclaimTok.is(AsmToken::Exclaim)) {
534      E = ExclaimTok.getLoc();
535      Writeback = true;
536      Parser.Lex(); // Eat exclaim token
537    }
538    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
539                                 OffsetRegShifted, ShiftType, ShiftAmount,
540                                 Preindexed, Postindexed, Negative, Writeback,
541                                 S, E);
542  }
543  // The "[Rn" we have so far was not followed by a comma.
544  else if (Tok.is(AsmToken::RBrac)) {
545    // If there's anything other than the right brace, this is a post indexing
546    // addressing form.
547    E = Tok.getLoc();
548    Parser.Lex(); // Eat right bracket token.
549
550    int OffsetRegNum = 0;
551    bool OffsetRegShifted = false;
552    enum ShiftType ShiftType;
553    const MCExpr *ShiftAmount;
554    const MCExpr *Offset = 0;
555
556    const AsmToken &NextTok = Parser.getTok();
557    if (NextTok.isNot(AsmToken::EndOfStatement)) {
558      Postindexed = true;
559      Writeback = true;
560      if (NextTok.isNot(AsmToken::Comma)) {
561        Error(NextTok.getLoc(), "',' expected");
562        return 0;
563      }
564      Parser.Lex(); // Eat comma token.
565      if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
566                               ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
567                               E))
568        return 0;
569    }
570
571    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
572                                 OffsetRegShifted, ShiftType, ShiftAmount,
573                                 Preindexed, Postindexed, Negative, Writeback,
574                                 S, E);
575  }
576
577  return 0;
578}
579
580/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
581/// we will parse the following (were +/- means that a plus or minus is
582/// optional):
583///   +/-Rm
584///   +/-Rm, shift
585///   #offset
586/// we return false on success or an error otherwise.
587bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
588                                        bool &OffsetRegShifted,
589                                        enum ShiftType &ShiftType,
590                                        const MCExpr *&ShiftAmount,
591                                        const MCExpr *&Offset,
592                                        bool &OffsetIsReg,
593                                        int &OffsetRegNum,
594                                        SMLoc &E) {
595  Negative = false;
596  OffsetRegShifted = false;
597  OffsetIsReg = false;
598  OffsetRegNum = -1;
599  const AsmToken &NextTok = Parser.getTok();
600  E = NextTok.getLoc();
601  if (NextTok.is(AsmToken::Plus))
602    Parser.Lex(); // Eat plus token.
603  else if (NextTok.is(AsmToken::Minus)) {
604    Negative = true;
605    Parser.Lex(); // Eat minus token
606  }
607  // See if there is a register following the "[Rn," or "[Rn]," we have so far.
608  const AsmToken &OffsetRegTok = Parser.getTok();
609  if (OffsetRegTok.is(AsmToken::Identifier)) {
610    SMLoc CurLoc = OffsetRegTok.getLoc();
611    OffsetRegNum = TryParseRegister();
612    if (OffsetRegNum != -1) {
613      OffsetIsReg = true;
614      E = CurLoc;
615    }
616  }
617
618  // If we parsed a register as the offset then their can be a shift after that
619  if (OffsetRegNum != -1) {
620    // Look for a comma then a shift
621    const AsmToken &Tok = Parser.getTok();
622    if (Tok.is(AsmToken::Comma)) {
623      Parser.Lex(); // Eat comma token.
624
625      const AsmToken &Tok = Parser.getTok();
626      if (ParseShift(ShiftType, ShiftAmount, E))
627        return Error(Tok.getLoc(), "shift expected");
628      OffsetRegShifted = true;
629    }
630  }
631  else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
632    // Look for #offset following the "[Rn," or "[Rn],"
633    const AsmToken &HashTok = Parser.getTok();
634    if (HashTok.isNot(AsmToken::Hash))
635      return Error(HashTok.getLoc(), "'#' expected");
636
637    Parser.Lex(); // Eat hash token.
638
639    if (getParser().ParseExpression(Offset))
640     return true;
641    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
642  }
643  return false;
644}
645
646/// ParseShift as one of these two:
647///   ( lsl | lsr | asr | ror ) , # shift_amount
648///   rrx
649/// and returns true if it parses a shift otherwise it returns false.
650bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
651                              SMLoc &E) {
652  const AsmToken &Tok = Parser.getTok();
653  if (Tok.isNot(AsmToken::Identifier))
654    return true;
655  StringRef ShiftName = Tok.getString();
656  if (ShiftName == "lsl" || ShiftName == "LSL")
657    St = Lsl;
658  else if (ShiftName == "lsr" || ShiftName == "LSR")
659    St = Lsr;
660  else if (ShiftName == "asr" || ShiftName == "ASR")
661    St = Asr;
662  else if (ShiftName == "ror" || ShiftName == "ROR")
663    St = Ror;
664  else if (ShiftName == "rrx" || ShiftName == "RRX")
665    St = Rrx;
666  else
667    return true;
668  Parser.Lex(); // Eat shift type token.
669
670  // Rrx stands alone.
671  if (St == Rrx)
672    return false;
673
674  // Otherwise, there must be a '#' and a shift amount.
675  const AsmToken &HashTok = Parser.getTok();
676  if (HashTok.isNot(AsmToken::Hash))
677    return Error(HashTok.getLoc(), "'#' expected");
678  Parser.Lex(); // Eat hash token.
679
680  if (getParser().ParseExpression(ShiftAmount))
681    return true;
682
683  return false;
684}
685
686/// Parse a arm instruction operand.  For now this parses the operand regardless
687/// of the mnemonic.
688ARMOperand *ARMAsmParser::ParseOperand() {
689  SMLoc S, E;
690
691  switch (getLexer().getKind()) {
692  case AsmToken::Identifier:
693    if (ARMOperand *Op = TryParseRegisterWithWriteBack())
694      return Op;
695
696    // This was not a register so parse other operands that start with an
697    // identifier (like labels) as expressions and create them as immediates.
698    const MCExpr *IdVal;
699    S = Parser.getTok().getLoc();
700    if (getParser().ParseExpression(IdVal))
701      return 0;
702    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
703    return ARMOperand::CreateImm(IdVal, S, E);
704  case AsmToken::LBrac:
705    return ParseMemory();
706  case AsmToken::LCurly:
707    return ParseRegisterList();
708  case AsmToken::Hash:
709    // #42 -> immediate.
710    // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
711    S = Parser.getTok().getLoc();
712    Parser.Lex();
713    const MCExpr *ImmVal;
714    if (getParser().ParseExpression(ImmVal))
715      return 0;
716    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
717    return ARMOperand::CreateImm(ImmVal, S, E);
718  default:
719    Error(Parser.getTok().getLoc(), "unexpected token in operand");
720    return 0;
721  }
722}
723
724/// Parse an arm instruction mnemonic followed by its operands.
725bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
726                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
727  // Create the leading tokens for the mnemonic, split by '.' characters.
728  size_t Start = 0, Next = Name.find('.');
729  StringRef Head = Name.slice(Start, Next);
730
731  // Determine the predicate, if any.
732  //
733  // FIXME: We need a way to check whether a prefix supports predication,
734  // otherwise we will end up with an ambiguity for instructions that happen to
735  // end with a predicate name.
736  // FIXME: Likewise, some arithmetic instructions have an 's' prefix which
737  // indicates to update the condition codes. Those instructions have an
738  // additional immediate operand which encodes the prefix as reg0 or CPSR.
739  // Just checking for a suffix of 's' definitely creates ambiguities; e.g,
740  // the SMMLS instruction.
741  unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
742    .Case("eq", ARMCC::EQ)
743    .Case("ne", ARMCC::NE)
744    .Case("hs", ARMCC::HS)
745    .Case("lo", ARMCC::LO)
746    .Case("mi", ARMCC::MI)
747    .Case("pl", ARMCC::PL)
748    .Case("vs", ARMCC::VS)
749    .Case("vc", ARMCC::VC)
750    .Case("hi", ARMCC::HI)
751    .Case("ls", ARMCC::LS)
752    .Case("ge", ARMCC::GE)
753    .Case("lt", ARMCC::LT)
754    .Case("gt", ARMCC::GT)
755    .Case("le", ARMCC::LE)
756    .Case("al", ARMCC::AL)
757    .Default(~0U);
758
759  if (CC == ~0U ||
760      (CC == ARMCC::LS && (Head == "vmls" || Head == "vnmls"))) {
761    CC = ARMCC::AL;
762  } else {
763    Head = Head.slice(0, Head.size() - 2);
764  }
765
766  Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
767  // FIXME: Should only add this operand for predicated instructions
768  Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), NameLoc));
769
770  // Add the remaining tokens in the mnemonic.
771  while (Next != StringRef::npos) {
772    Start = Next;
773    Next = Name.find('.', Start + 1);
774    Head = Name.slice(Start, Next);
775
776    Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
777  }
778
779  // Read the remaining operands.
780  if (getLexer().isNot(AsmToken::EndOfStatement)) {
781    // Read the first operand.
782    if (ARMOperand *Op = ParseOperand())
783      Operands.push_back(Op);
784    else {
785      Parser.EatToEndOfStatement();
786      return true;
787    }
788
789    while (getLexer().is(AsmToken::Comma)) {
790      Parser.Lex();  // Eat the comma.
791
792      // Parse and remember the operand.
793      if (ARMOperand *Op = ParseOperand())
794        Operands.push_back(Op);
795      else {
796        Parser.EatToEndOfStatement();
797        return true;
798      }
799    }
800  }
801
802  if (getLexer().isNot(AsmToken::EndOfStatement)) {
803    Parser.EatToEndOfStatement();
804    return TokError("unexpected token in argument list");
805  }
806  Parser.Lex(); // Consume the EndOfStatement
807  return false;
808}
809
810bool ARMAsmParser::
811MatchAndEmitInstruction(SMLoc IDLoc,
812                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
813                        MCStreamer &Out) {
814  MCInst Inst;
815  unsigned ErrorInfo;
816  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
817  case Match_Success:
818    Out.EmitInstruction(Inst);
819    return false;
820
821  case Match_MissingFeature:
822    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
823    return true;
824  case Match_InvalidOperand: {
825    SMLoc ErrorLoc = IDLoc;
826    if (ErrorInfo != ~0U) {
827      if (ErrorInfo >= Operands.size())
828        return Error(IDLoc, "too few operands for instruction");
829
830      ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
831      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
832    }
833
834    return Error(ErrorLoc, "invalid operand for instruction");
835  }
836  case Match_MnemonicFail:
837    return Error(IDLoc, "unrecognized instruction mnemonic");
838  }
839
840  llvm_unreachable("Implement any new match types added!");
841}
842
843
844
845/// ParseDirective parses the arm specific directives
846bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
847  StringRef IDVal = DirectiveID.getIdentifier();
848  if (IDVal == ".word")
849    return ParseDirectiveWord(4, DirectiveID.getLoc());
850  else if (IDVal == ".thumb")
851    return ParseDirectiveThumb(DirectiveID.getLoc());
852  else if (IDVal == ".thumb_func")
853    return ParseDirectiveThumbFunc(DirectiveID.getLoc());
854  else if (IDVal == ".code")
855    return ParseDirectiveCode(DirectiveID.getLoc());
856  else if (IDVal == ".syntax")
857    return ParseDirectiveSyntax(DirectiveID.getLoc());
858  return true;
859}
860
861/// ParseDirectiveWord
862///  ::= .word [ expression (, expression)* ]
863bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
864  if (getLexer().isNot(AsmToken::EndOfStatement)) {
865    for (;;) {
866      const MCExpr *Value;
867      if (getParser().ParseExpression(Value))
868        return true;
869
870      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
871
872      if (getLexer().is(AsmToken::EndOfStatement))
873        break;
874
875      // FIXME: Improve diagnostic.
876      if (getLexer().isNot(AsmToken::Comma))
877        return Error(L, "unexpected token in directive");
878      Parser.Lex();
879    }
880  }
881
882  Parser.Lex();
883  return false;
884}
885
886/// ParseDirectiveThumb
887///  ::= .thumb
888bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
889  if (getLexer().isNot(AsmToken::EndOfStatement))
890    return Error(L, "unexpected token in directive");
891  Parser.Lex();
892
893  // TODO: set thumb mode
894  // TODO: tell the MC streamer the mode
895  // getParser().getStreamer().Emit???();
896  return false;
897}
898
899/// ParseDirectiveThumbFunc
900///  ::= .thumbfunc symbol_name
901bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
902  const AsmToken &Tok = Parser.getTok();
903  if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
904    return Error(L, "unexpected token in .thumb_func directive");
905  StringRef Name = Tok.getString();
906  Parser.Lex(); // Consume the identifier token.
907  if (getLexer().isNot(AsmToken::EndOfStatement))
908    return Error(L, "unexpected token in directive");
909  Parser.Lex();
910
911  // Mark symbol as a thumb symbol.
912  MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
913  getParser().getStreamer().EmitThumbFunc(Func);
914  return false;
915}
916
917/// ParseDirectiveSyntax
918///  ::= .syntax unified | divided
919bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
920  const AsmToken &Tok = Parser.getTok();
921  if (Tok.isNot(AsmToken::Identifier))
922    return Error(L, "unexpected token in .syntax directive");
923  StringRef Mode = Tok.getString();
924  if (Mode == "unified" || Mode == "UNIFIED")
925    Parser.Lex();
926  else if (Mode == "divided" || Mode == "DIVIDED")
927    Parser.Lex();
928  else
929    return Error(L, "unrecognized syntax mode in .syntax directive");
930
931  if (getLexer().isNot(AsmToken::EndOfStatement))
932    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
933  Parser.Lex();
934
935  // TODO tell the MC streamer the mode
936  // getParser().getStreamer().Emit???();
937  return false;
938}
939
940/// ParseDirectiveCode
941///  ::= .code 16 | 32
942bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
943  const AsmToken &Tok = Parser.getTok();
944  if (Tok.isNot(AsmToken::Integer))
945    return Error(L, "unexpected token in .code directive");
946  int64_t Val = Parser.getTok().getIntVal();
947  if (Val == 16)
948    Parser.Lex();
949  else if (Val == 32)
950    Parser.Lex();
951  else
952    return Error(L, "invalid operand to .code directive");
953
954  if (getLexer().isNot(AsmToken::EndOfStatement))
955    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
956  Parser.Lex();
957
958  if (Val == 16)
959    getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
960  else
961    getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
962
963  return false;
964}
965
966extern "C" void LLVMInitializeARMAsmLexer();
967
968/// Force static initialization.
969extern "C" void LLVMInitializeARMAsmParser() {
970  RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
971  RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
972  LLVMInitializeARMAsmLexer();
973}
974
975#define GET_REGISTER_MATCHER
976#define GET_MATCHER_IMPLEMENTATION
977#include "ARMGenAsmMatcher.inc"
978