ARMAsmParser.cpp revision 92b5a2eb1646b3c1173a5ff3c0073f24ed5ee6a4
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARM.h"
11#include "ARMAddressingModes.h"
12#include "ARMSubtarget.h"
13#include "llvm/MC/MCParser/MCAsmLexer.h"
14#include "llvm/MC/MCParser/MCAsmParser.h"
15#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
16#include "llvm/MC/MCStreamer.h"
17#include "llvm/MC/MCExpr.h"
18#include "llvm/MC/MCInst.h"
19#include "llvm/Target/TargetRegistry.h"
20#include "llvm/Target/TargetAsmParser.h"
21#include "llvm/Support/SourceMgr.h"
22#include "llvm/Support/raw_ostream.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/StringSwitch.h"
25#include "llvm/ADT/Twine.h"
26using namespace llvm;
27
28// The shift types for register controlled shifts in arm memory addressing
29enum ShiftType {
30  Lsl,
31  Lsr,
32  Asr,
33  Ror,
34  Rrx
35};
36
37namespace {
38  struct ARMOperand;
39
40class ARMAsmParser : public TargetAsmParser {
41  MCAsmParser &Parser;
42  TargetMachine &TM;
43
44private:
45  MCAsmParser &getParser() const { return Parser; }
46
47  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
48
49  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
50
51  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
52
53  int TryParseRegister();
54  ARMOperand *TryParseRegisterWithWriteBack();
55  ARMOperand *ParseRegisterList();
56  ARMOperand *ParseMemory();
57
58  bool ParseMemoryOffsetReg(bool &Negative,
59                            bool &OffsetRegShifted,
60                            enum ShiftType &ShiftType,
61                            const MCExpr *&ShiftAmount,
62                            const MCExpr *&Offset,
63                            bool &OffsetIsReg,
64                            int &OffsetRegNum,
65                            SMLoc &E);
66
67  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
68
69  ARMOperand *ParseOperand();
70
71  bool ParseDirectiveWord(unsigned Size, SMLoc L);
72
73  bool ParseDirectiveThumb(SMLoc L);
74
75  bool ParseDirectiveThumbFunc(SMLoc L);
76
77  bool ParseDirectiveCode(SMLoc L);
78
79  bool ParseDirectiveSyntax(SMLoc L);
80
81  bool MatchAndEmitInstruction(SMLoc IDLoc,
82                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
83                               MCStreamer &Out);
84
85  /// @name Auto-generated Match Functions
86  /// {
87
88#define GET_ASSEMBLER_HEADER
89#include "ARMGenAsmMatcher.inc"
90
91  /// }
92
93
94public:
95  ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
96    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
97      // Initialize the set of available features.
98      setAvailableFeatures(ComputeAvailableFeatures(
99          &TM.getSubtarget<ARMSubtarget>()));
100    }
101
102  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
103                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
104
105  virtual bool ParseDirective(AsmToken DirectiveID);
106};
107} // end anonymous namespace
108
109namespace {
110
111/// ARMOperand - Instances of this class represent a parsed ARM machine
112/// instruction.
113struct ARMOperand : public MCParsedAsmOperand {
114public:
115  enum KindTy {
116    CondCode,
117    Immediate,
118    Memory,
119    Register,
120    Token
121  } Kind;
122
123  SMLoc StartLoc, EndLoc;
124
125  union {
126    struct {
127      ARMCC::CondCodes Val;
128    } CC;
129
130    struct {
131      const char *Data;
132      unsigned Length;
133    } Tok;
134
135    struct {
136      unsigned RegNum;
137      bool Writeback;
138    } Reg;
139
140    struct {
141      const MCExpr *Val;
142    } Imm;
143
144    // This is for all forms of ARM address expressions
145    struct {
146      unsigned BaseRegNum;
147      unsigned OffsetRegNum; // used when OffsetIsReg is true
148      const MCExpr *Offset; // used when OffsetIsReg is false
149      const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
150      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
151      unsigned
152        OffsetRegShifted : 1, // only used when OffsetIsReg is true
153        Preindexed : 1,
154        Postindexed : 1,
155        OffsetIsReg : 1,
156        Negative : 1, // only used when OffsetIsReg is true
157        Writeback : 1;
158    } Mem;
159
160  };
161
162  ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
163    Kind = o.Kind;
164    StartLoc = o.StartLoc;
165    EndLoc = o.EndLoc;
166    switch (Kind) {
167    case CondCode:
168      CC = o.CC;
169      break;
170    case Token:
171      Tok = o.Tok;
172      break;
173    case Register:
174      Reg = o.Reg;
175      break;
176    case Immediate:
177      Imm = o.Imm;
178      break;
179    case Memory:
180      Mem = o.Mem;
181      break;
182    }
183  }
184
185  /// getStartLoc - Get the location of the first token of this operand.
186  SMLoc getStartLoc() const { return StartLoc; }
187  /// getEndLoc - Get the location of the last token of this operand.
188  SMLoc getEndLoc() const { return EndLoc; }
189
190  ARMCC::CondCodes getCondCode() const {
191    assert(Kind == CondCode && "Invalid access!");
192    return CC.Val;
193  }
194
195  StringRef getToken() const {
196    assert(Kind == Token && "Invalid access!");
197    return StringRef(Tok.Data, Tok.Length);
198  }
199
200  unsigned getReg() const {
201    assert(Kind == Register && "Invalid access!");
202    return Reg.RegNum;
203  }
204
205  const MCExpr *getImm() const {
206    assert(Kind == Immediate && "Invalid access!");
207    return Imm.Val;
208  }
209
210  bool isCondCode() const { return Kind == CondCode; }
211  bool isImm() const { return Kind == Immediate; }
212  bool isReg() const { return Kind == Register; }
213  bool isToken() const { return Kind == Token; }
214  bool isMemory() const { return Kind == Memory; }
215
216  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
217    // Add as immediates when possible.  Null MCExpr = 0.
218    if (Expr == 0)
219      Inst.addOperand(MCOperand::CreateImm(0));
220    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
221      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
222    else
223      Inst.addOperand(MCOperand::CreateExpr(Expr));
224  }
225
226  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
227    assert(N == 2 && "Invalid number of operands!");
228    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
229    // FIXME: What belongs here?
230    Inst.addOperand(MCOperand::CreateReg(0));
231  }
232
233  void addRegOperands(MCInst &Inst, unsigned N) const {
234    assert(N == 1 && "Invalid number of operands!");
235    Inst.addOperand(MCOperand::CreateReg(getReg()));
236  }
237
238  void addImmOperands(MCInst &Inst, unsigned N) const {
239    assert(N == 1 && "Invalid number of operands!");
240    addExpr(Inst, getImm());
241  }
242
243
244  bool isMemMode5() const {
245    if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted ||
246        Mem.Writeback || Mem.Negative)
247      return false;
248    // If there is an offset expression, make sure it's valid.
249    if (!Mem.Offset)
250      return true;
251    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
252    if (!CE)
253      return false;
254    // The offset must be a multiple of 4 in the range 0-1020.
255    int64_t Value = CE->getValue();
256    return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
257  }
258
259  void addMemMode5Operands(MCInst &Inst, unsigned N) const {
260    assert(N == 2 && isMemMode5() && "Invalid number of operands!");
261
262    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
263    assert(!Mem.OffsetIsReg && "invalid mode 5 operand");
264
265    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
266    // the difference?
267    if (Mem.Offset) {
268      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
269      assert(CE && "Non-constant mode 5 offset operand!");
270
271      // The MCInst offset operand doesn't include the low two bits (like
272      // the instruction encoding).
273      int64_t Offset = CE->getValue() / 4;
274      if (Offset >= 0)
275        Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
276                                                               Offset)));
277      else
278        Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
279                                                               -Offset)));
280    } else {
281      Inst.addOperand(MCOperand::CreateImm(0));
282    }
283  }
284
285  virtual void dump(raw_ostream &OS) const;
286
287  static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
288    ARMOperand *Op = new ARMOperand(CondCode);
289    Op->CC.Val = CC;
290    Op->StartLoc = S;
291    Op->EndLoc = S;
292    return Op;
293  }
294
295  static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
296    ARMOperand *Op = new ARMOperand(Token);
297    Op->Tok.Data = Str.data();
298    Op->Tok.Length = Str.size();
299    Op->StartLoc = S;
300    Op->EndLoc = S;
301    return Op;
302  }
303
304  static ARMOperand *CreateReg(unsigned RegNum, bool Writeback, SMLoc S,
305                               SMLoc E) {
306    ARMOperand *Op = new ARMOperand(Register);
307    Op->Reg.RegNum = RegNum;
308    Op->Reg.Writeback = Writeback;
309    Op->StartLoc = S;
310    Op->EndLoc = E;
311    return Op;
312  }
313
314  static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
315    ARMOperand *Op = new ARMOperand(Immediate);
316    Op->Imm.Val = Val;
317    Op->StartLoc = S;
318    Op->EndLoc = E;
319    return Op;
320  }
321
322  static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
323                               const MCExpr *Offset, unsigned OffsetRegNum,
324                               bool OffsetRegShifted, enum ShiftType ShiftType,
325                               const MCExpr *ShiftAmount, bool Preindexed,
326                               bool Postindexed, bool Negative, bool Writeback,
327                               SMLoc S, SMLoc E) {
328    ARMOperand *Op = new ARMOperand(Memory);
329    Op->Mem.BaseRegNum = BaseRegNum;
330    Op->Mem.OffsetIsReg = OffsetIsReg;
331    Op->Mem.Offset = Offset;
332    Op->Mem.OffsetRegNum = OffsetRegNum;
333    Op->Mem.OffsetRegShifted = OffsetRegShifted;
334    Op->Mem.ShiftType = ShiftType;
335    Op->Mem.ShiftAmount = ShiftAmount;
336    Op->Mem.Preindexed = Preindexed;
337    Op->Mem.Postindexed = Postindexed;
338    Op->Mem.Negative = Negative;
339    Op->Mem.Writeback = Writeback;
340
341    Op->StartLoc = S;
342    Op->EndLoc = E;
343    return Op;
344  }
345
346private:
347  ARMOperand(KindTy K) : Kind(K) {}
348};
349
350} // end anonymous namespace.
351
352void ARMOperand::dump(raw_ostream &OS) const {
353  switch (Kind) {
354  case CondCode:
355    OS << ARMCondCodeToString(getCondCode());
356    break;
357  case Immediate:
358    getImm()->print(OS);
359    break;
360  case Memory:
361    OS << "<memory>";
362    break;
363  case Register:
364    OS << "<register " << getReg() << ">";
365    break;
366  case Token:
367    OS << "'" << getToken() << "'";
368    break;
369  }
370}
371
372/// @name Auto-generated Match Functions
373/// {
374
375static unsigned MatchRegisterName(StringRef Name);
376
377/// }
378
379/// Try to parse a register name.  The token must be an Identifier when called,
380/// and if it is a register name the token is eaten and the register number is
381/// returned.  Otherwise return -1.
382///
383int ARMAsmParser::TryParseRegister() {
384  const AsmToken &Tok = Parser.getTok();
385  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
386
387  // FIXME: Validate register for the current architecture; we have to do
388  // validation later, so maybe there is no need for this here.
389  int RegNum = MatchRegisterName(Tok.getString());
390  if (RegNum == -1)
391    return -1;
392  Parser.Lex(); // Eat identifier token.
393  return RegNum;
394}
395
396
397/// Try to parse a register name.  The token must be an Identifier when called,
398/// and if it is a register name the token is eaten and the register number is
399/// returned.  Otherwise return -1.
400///
401/// TODO this is likely to change to allow different register types and or to
402/// parse for a specific register type.
403ARMOperand *ARMAsmParser::TryParseRegisterWithWriteBack() {
404  SMLoc S = Parser.getTok().getLoc();
405  int RegNo = TryParseRegister();
406  if (RegNo == -1) return 0;
407
408  SMLoc E = Parser.getTok().getLoc();
409
410  bool Writeback = false;
411  const AsmToken &ExclaimTok = Parser.getTok();
412  if (ExclaimTok.is(AsmToken::Exclaim)) {
413    E = ExclaimTok.getLoc();
414    Writeback = true;
415    Parser.Lex(); // Eat exclaim token
416  }
417
418  return ARMOperand::CreateReg(RegNo, Writeback, S, E);
419}
420
421/// Parse a register list, return it if successful else return null.  The first
422/// token must be a '{' when called.
423ARMOperand *ARMAsmParser::ParseRegisterList() {
424  SMLoc S, E;
425  assert(Parser.getTok().is(AsmToken::LCurly) &&
426         "Token is not an Left Curly Brace");
427  S = Parser.getTok().getLoc();
428  Parser.Lex(); // Eat left curly brace token.
429
430  const AsmToken &RegTok = Parser.getTok();
431  SMLoc RegLoc = RegTok.getLoc();
432  if (RegTok.isNot(AsmToken::Identifier)) {
433    Error(RegLoc, "register expected");
434    return 0;
435  }
436  int RegNum = MatchRegisterName(RegTok.getString());
437  if (RegNum == -1) {
438    Error(RegLoc, "register expected");
439    return 0;
440  }
441
442  Parser.Lex(); // Eat identifier token.
443  unsigned RegList = 1 << RegNum;
444
445  int HighRegNum = RegNum;
446  // TODO ranges like "{Rn-Rm}"
447  while (Parser.getTok().is(AsmToken::Comma)) {
448    Parser.Lex(); // Eat comma token.
449
450    const AsmToken &RegTok = Parser.getTok();
451    SMLoc RegLoc = RegTok.getLoc();
452    if (RegTok.isNot(AsmToken::Identifier)) {
453      Error(RegLoc, "register expected");
454      return 0;
455    }
456    int RegNum = MatchRegisterName(RegTok.getString());
457    if (RegNum == -1) {
458      Error(RegLoc, "register expected");
459      return 0;
460    }
461
462    if (RegList & (1 << RegNum))
463      Warning(RegLoc, "register duplicated in register list");
464    else if (RegNum <= HighRegNum)
465      Warning(RegLoc, "register not in ascending order in register list");
466    RegList |= 1 << RegNum;
467    HighRegNum = RegNum;
468
469    Parser.Lex(); // Eat identifier token.
470  }
471  const AsmToken &RCurlyTok = Parser.getTok();
472  if (RCurlyTok.isNot(AsmToken::RCurly)) {
473    Error(RCurlyTok.getLoc(), "'}' expected");
474    return 0;
475  }
476  E = RCurlyTok.getLoc();
477  Parser.Lex(); // Eat left curly brace token.
478
479  // FIXME: Need to return an operand!
480  Error(E, "FIXME: register list parsing not implemented");
481  return 0;
482}
483
484/// Parse an arm memory expression, return false if successful else return true
485/// or an error.  The first token must be a '[' when called.
486/// TODO Only preindexing and postindexing addressing are started, unindexed
487/// with option, etc are still to do.
488ARMOperand *ARMAsmParser::ParseMemory() {
489  SMLoc S, E;
490  assert(Parser.getTok().is(AsmToken::LBrac) &&
491         "Token is not an Left Bracket");
492  S = Parser.getTok().getLoc();
493  Parser.Lex(); // Eat left bracket token.
494
495  const AsmToken &BaseRegTok = Parser.getTok();
496  if (BaseRegTok.isNot(AsmToken::Identifier)) {
497    Error(BaseRegTok.getLoc(), "register expected");
498    return 0;
499  }
500  int BaseRegNum = TryParseRegister();
501  if (BaseRegNum == -1) {
502    Error(BaseRegTok.getLoc(), "register expected");
503    return 0;
504  }
505
506  bool Preindexed = false;
507  bool Postindexed = false;
508  bool OffsetIsReg = false;
509  bool Negative = false;
510  bool Writeback = false;
511
512  // First look for preindexed address forms, that is after the "[Rn" we now
513  // have to see if the next token is a comma.
514  const AsmToken &Tok = Parser.getTok();
515  if (Tok.is(AsmToken::Comma)) {
516    Preindexed = true;
517    Parser.Lex(); // Eat comma token.
518    int OffsetRegNum;
519    bool OffsetRegShifted;
520    enum ShiftType ShiftType;
521    const MCExpr *ShiftAmount;
522    const MCExpr *Offset;
523    if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
524                             Offset, OffsetIsReg, OffsetRegNum, E))
525      return 0;
526    const AsmToken &RBracTok = Parser.getTok();
527    if (RBracTok.isNot(AsmToken::RBrac)) {
528      Error(RBracTok.getLoc(), "']' expected");
529      return 0;
530    }
531    E = RBracTok.getLoc();
532    Parser.Lex(); // Eat right bracket token.
533
534    const AsmToken &ExclaimTok = Parser.getTok();
535    if (ExclaimTok.is(AsmToken::Exclaim)) {
536      E = ExclaimTok.getLoc();
537      Writeback = true;
538      Parser.Lex(); // Eat exclaim token
539    }
540    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
541                                 OffsetRegShifted, ShiftType, ShiftAmount,
542                                 Preindexed, Postindexed, Negative, Writeback,
543                                 S, E);
544  }
545  // The "[Rn" we have so far was not followed by a comma.
546  else if (Tok.is(AsmToken::RBrac)) {
547    // If there's anything other than the right brace, this is a post indexing
548    // addressing form.
549    E = Tok.getLoc();
550    Parser.Lex(); // Eat right bracket token.
551
552    int OffsetRegNum = 0;
553    bool OffsetRegShifted = false;
554    enum ShiftType ShiftType;
555    const MCExpr *ShiftAmount;
556    const MCExpr *Offset = 0;
557
558    const AsmToken &NextTok = Parser.getTok();
559    if (NextTok.isNot(AsmToken::EndOfStatement)) {
560      Postindexed = true;
561      Writeback = true;
562      if (NextTok.isNot(AsmToken::Comma)) {
563        Error(NextTok.getLoc(), "',' expected");
564        return 0;
565      }
566      Parser.Lex(); // Eat comma token.
567      if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
568                               ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
569                               E))
570        return 0;
571    }
572
573    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
574                                 OffsetRegShifted, ShiftType, ShiftAmount,
575                                 Preindexed, Postindexed, Negative, Writeback,
576                                 S, E);
577  }
578
579  return 0;
580}
581
582/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
583/// we will parse the following (were +/- means that a plus or minus is
584/// optional):
585///   +/-Rm
586///   +/-Rm, shift
587///   #offset
588/// we return false on success or an error otherwise.
589bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
590                                        bool &OffsetRegShifted,
591                                        enum ShiftType &ShiftType,
592                                        const MCExpr *&ShiftAmount,
593                                        const MCExpr *&Offset,
594                                        bool &OffsetIsReg,
595                                        int &OffsetRegNum,
596                                        SMLoc &E) {
597  Negative = false;
598  OffsetRegShifted = false;
599  OffsetIsReg = false;
600  OffsetRegNum = -1;
601  const AsmToken &NextTok = Parser.getTok();
602  E = NextTok.getLoc();
603  if (NextTok.is(AsmToken::Plus))
604    Parser.Lex(); // Eat plus token.
605  else if (NextTok.is(AsmToken::Minus)) {
606    Negative = true;
607    Parser.Lex(); // Eat minus token
608  }
609  // See if there is a register following the "[Rn," or "[Rn]," we have so far.
610  const AsmToken &OffsetRegTok = Parser.getTok();
611  if (OffsetRegTok.is(AsmToken::Identifier)) {
612    SMLoc CurLoc = OffsetRegTok.getLoc();
613    OffsetRegNum = TryParseRegister();
614    if (OffsetRegNum != -1) {
615      OffsetIsReg = true;
616      E = CurLoc;
617    }
618  }
619
620  // If we parsed a register as the offset then their can be a shift after that
621  if (OffsetRegNum != -1) {
622    // Look for a comma then a shift
623    const AsmToken &Tok = Parser.getTok();
624    if (Tok.is(AsmToken::Comma)) {
625      Parser.Lex(); // Eat comma token.
626
627      const AsmToken &Tok = Parser.getTok();
628      if (ParseShift(ShiftType, ShiftAmount, E))
629        return Error(Tok.getLoc(), "shift expected");
630      OffsetRegShifted = true;
631    }
632  }
633  else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
634    // Look for #offset following the "[Rn," or "[Rn],"
635    const AsmToken &HashTok = Parser.getTok();
636    if (HashTok.isNot(AsmToken::Hash))
637      return Error(HashTok.getLoc(), "'#' expected");
638
639    Parser.Lex(); // Eat hash token.
640
641    if (getParser().ParseExpression(Offset))
642     return true;
643    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
644  }
645  return false;
646}
647
648/// ParseShift as one of these two:
649///   ( lsl | lsr | asr | ror ) , # shift_amount
650///   rrx
651/// and returns true if it parses a shift otherwise it returns false.
652bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
653                              SMLoc &E) {
654  const AsmToken &Tok = Parser.getTok();
655  if (Tok.isNot(AsmToken::Identifier))
656    return true;
657  StringRef ShiftName = Tok.getString();
658  if (ShiftName == "lsl" || ShiftName == "LSL")
659    St = Lsl;
660  else if (ShiftName == "lsr" || ShiftName == "LSR")
661    St = Lsr;
662  else if (ShiftName == "asr" || ShiftName == "ASR")
663    St = Asr;
664  else if (ShiftName == "ror" || ShiftName == "ROR")
665    St = Ror;
666  else if (ShiftName == "rrx" || ShiftName == "RRX")
667    St = Rrx;
668  else
669    return true;
670  Parser.Lex(); // Eat shift type token.
671
672  // Rrx stands alone.
673  if (St == Rrx)
674    return false;
675
676  // Otherwise, there must be a '#' and a shift amount.
677  const AsmToken &HashTok = Parser.getTok();
678  if (HashTok.isNot(AsmToken::Hash))
679    return Error(HashTok.getLoc(), "'#' expected");
680  Parser.Lex(); // Eat hash token.
681
682  if (getParser().ParseExpression(ShiftAmount))
683    return true;
684
685  return false;
686}
687
688/// Parse a arm instruction operand.  For now this parses the operand regardless
689/// of the mnemonic.
690ARMOperand *ARMAsmParser::ParseOperand() {
691  SMLoc S, E;
692
693  switch (getLexer().getKind()) {
694  case AsmToken::Identifier:
695    if (ARMOperand *Op = TryParseRegisterWithWriteBack())
696      return Op;
697
698    // This was not a register so parse other operands that start with an
699    // identifier (like labels) as expressions and create them as immediates.
700    const MCExpr *IdVal;
701    S = Parser.getTok().getLoc();
702    if (getParser().ParseExpression(IdVal))
703      return 0;
704    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
705    return ARMOperand::CreateImm(IdVal, S, E);
706  case AsmToken::LBrac:
707    return ParseMemory();
708  case AsmToken::LCurly:
709    return ParseRegisterList();
710  case AsmToken::Hash:
711    // #42 -> immediate.
712    // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
713    S = Parser.getTok().getLoc();
714    Parser.Lex();
715    const MCExpr *ImmVal;
716    if (getParser().ParseExpression(ImmVal))
717      return 0;
718    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
719    return ARMOperand::CreateImm(ImmVal, S, E);
720  default:
721    Error(Parser.getTok().getLoc(), "unexpected token in operand");
722    return 0;
723  }
724}
725
726/// Parse an arm instruction mnemonic followed by its operands.
727bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
728                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
729  // Create the leading tokens for the mnemonic, split by '.' characters.
730  size_t Start = 0, Next = Name.find('.');
731  StringRef Head = Name.slice(Start, Next);
732
733  // Determine the predicate, if any.
734  //
735  // FIXME: We need a way to check whether a prefix supports predication,
736  // otherwise we will end up with an ambiguity for instructions that happen to
737  // end with a predicate name.
738  // FIXME: Likewise, some arithmetic instructions have an 's' prefix which
739  // indicates to update the condition codes. Those instructions have an
740  // additional immediate operand which encodes the prefix as reg0 or CPSR.
741  // Just checking for a suffix of 's' definitely creates ambiguities; e.g,
742  // the SMMLS instruction.
743  unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
744    .Case("eq", ARMCC::EQ)
745    .Case("ne", ARMCC::NE)
746    .Case("hs", ARMCC::HS)
747    .Case("lo", ARMCC::LO)
748    .Case("mi", ARMCC::MI)
749    .Case("pl", ARMCC::PL)
750    .Case("vs", ARMCC::VS)
751    .Case("vc", ARMCC::VC)
752    .Case("hi", ARMCC::HI)
753    .Case("ls", ARMCC::LS)
754    .Case("ge", ARMCC::GE)
755    .Case("lt", ARMCC::LT)
756    .Case("gt", ARMCC::GT)
757    .Case("le", ARMCC::LE)
758    .Case("al", ARMCC::AL)
759    .Default(~0U);
760
761  if (CC == ~0U ||
762      (CC == ARMCC::LS && (Head == "vmls" || Head == "vnmls"))) {
763    CC = ARMCC::AL;
764  } else {
765    Head = Head.slice(0, Head.size() - 2);
766  }
767
768  Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
769  // FIXME: Should only add this operand for predicated instructions
770  Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), NameLoc));
771
772  // Add the remaining tokens in the mnemonic.
773  while (Next != StringRef::npos) {
774    Start = Next;
775    Next = Name.find('.', Start + 1);
776    Head = Name.slice(Start, Next);
777
778    Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
779  }
780
781  // Read the remaining operands.
782  if (getLexer().isNot(AsmToken::EndOfStatement)) {
783    // Read the first operand.
784    if (ARMOperand *Op = ParseOperand())
785      Operands.push_back(Op);
786    else {
787      Parser.EatToEndOfStatement();
788      return true;
789    }
790
791    while (getLexer().is(AsmToken::Comma)) {
792      Parser.Lex();  // Eat the comma.
793
794      // Parse and remember the operand.
795      if (ARMOperand *Op = ParseOperand())
796        Operands.push_back(Op);
797      else {
798        Parser.EatToEndOfStatement();
799        return true;
800      }
801    }
802  }
803
804  if (getLexer().isNot(AsmToken::EndOfStatement)) {
805    Parser.EatToEndOfStatement();
806    return TokError("unexpected token in argument list");
807  }
808  Parser.Lex(); // Consume the EndOfStatement
809  return false;
810}
811
812bool ARMAsmParser::
813MatchAndEmitInstruction(SMLoc IDLoc,
814                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
815                        MCStreamer &Out) {
816  MCInst Inst;
817  unsigned ErrorInfo;
818  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
819  case Match_Success:
820    Out.EmitInstruction(Inst);
821    return false;
822
823  case Match_MissingFeature:
824    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
825    return true;
826  case Match_InvalidOperand: {
827    SMLoc ErrorLoc = IDLoc;
828    if (ErrorInfo != ~0U) {
829      if (ErrorInfo >= Operands.size())
830        return Error(IDLoc, "too few operands for instruction");
831
832      ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
833      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
834    }
835
836    return Error(ErrorLoc, "invalid operand for instruction");
837  }
838  case Match_MnemonicFail:
839    return Error(IDLoc, "unrecognized instruction mnemonic");
840  }
841
842  llvm_unreachable("Implement any new match types added!");
843}
844
845
846
847/// ParseDirective parses the arm specific directives
848bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
849  StringRef IDVal = DirectiveID.getIdentifier();
850  if (IDVal == ".word")
851    return ParseDirectiveWord(4, DirectiveID.getLoc());
852  else if (IDVal == ".thumb")
853    return ParseDirectiveThumb(DirectiveID.getLoc());
854  else if (IDVal == ".thumb_func")
855    return ParseDirectiveThumbFunc(DirectiveID.getLoc());
856  else if (IDVal == ".code")
857    return ParseDirectiveCode(DirectiveID.getLoc());
858  else if (IDVal == ".syntax")
859    return ParseDirectiveSyntax(DirectiveID.getLoc());
860  return true;
861}
862
863/// ParseDirectiveWord
864///  ::= .word [ expression (, expression)* ]
865bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
866  if (getLexer().isNot(AsmToken::EndOfStatement)) {
867    for (;;) {
868      const MCExpr *Value;
869      if (getParser().ParseExpression(Value))
870        return true;
871
872      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
873
874      if (getLexer().is(AsmToken::EndOfStatement))
875        break;
876
877      // FIXME: Improve diagnostic.
878      if (getLexer().isNot(AsmToken::Comma))
879        return Error(L, "unexpected token in directive");
880      Parser.Lex();
881    }
882  }
883
884  Parser.Lex();
885  return false;
886}
887
888/// ParseDirectiveThumb
889///  ::= .thumb
890bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
891  if (getLexer().isNot(AsmToken::EndOfStatement))
892    return Error(L, "unexpected token in directive");
893  Parser.Lex();
894
895  // TODO: set thumb mode
896  // TODO: tell the MC streamer the mode
897  // getParser().getStreamer().Emit???();
898  return false;
899}
900
901/// ParseDirectiveThumbFunc
902///  ::= .thumbfunc symbol_name
903bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
904  const AsmToken &Tok = Parser.getTok();
905  if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
906    return Error(L, "unexpected token in .syntax directive");
907  Parser.Lex(); // Consume the identifier token.
908
909  if (getLexer().isNot(AsmToken::EndOfStatement))
910    return Error(L, "unexpected token in directive");
911  Parser.Lex();
912
913  // TODO: mark symbol as a thumb symbol
914  // getParser().getStreamer().Emit???();
915  return false;
916}
917
918/// ParseDirectiveSyntax
919///  ::= .syntax unified | divided
920bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
921  const AsmToken &Tok = Parser.getTok();
922  if (Tok.isNot(AsmToken::Identifier))
923    return Error(L, "unexpected token in .syntax directive");
924  StringRef Mode = Tok.getString();
925  if (Mode == "unified" || Mode == "UNIFIED")
926    Parser.Lex();
927  else if (Mode == "divided" || Mode == "DIVIDED")
928    Parser.Lex();
929  else
930    return Error(L, "unrecognized syntax mode in .syntax directive");
931
932  if (getLexer().isNot(AsmToken::EndOfStatement))
933    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
934  Parser.Lex();
935
936  // TODO tell the MC streamer the mode
937  // getParser().getStreamer().Emit???();
938  return false;
939}
940
941/// ParseDirectiveCode
942///  ::= .code 16 | 32
943bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
944  const AsmToken &Tok = Parser.getTok();
945  if (Tok.isNot(AsmToken::Integer))
946    return Error(L, "unexpected token in .code directive");
947  int64_t Val = Parser.getTok().getIntVal();
948  if (Val == 16)
949    Parser.Lex();
950  else if (Val == 32)
951    Parser.Lex();
952  else
953    return Error(L, "invalid operand to .code directive");
954
955  if (getLexer().isNot(AsmToken::EndOfStatement))
956    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
957  Parser.Lex();
958
959  // TODO tell the MC streamer the mode
960  // getParser().getStreamer().Emit???();
961  return false;
962}
963
964extern "C" void LLVMInitializeARMAsmLexer();
965
966/// Force static initialization.
967extern "C" void LLVMInitializeARMAsmParser() {
968  RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
969  RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
970  LLVMInitializeARMAsmLexer();
971}
972
973#define GET_REGISTER_MATCHER
974#define GET_MATCHER_IMPLEMENTATION
975#include "ARMGenAsmMatcher.inc"
976