ARMAsmParser.cpp revision 469ebbe148b18a78963e8bc3fa7ae8e5700d8d27
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARM.h"
11#include "ARMSubtarget.h"
12#include "llvm/MC/MCParser/MCAsmLexer.h"
13#include "llvm/MC/MCParser/MCAsmParser.h"
14#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
15#include "llvm/MC/MCStreamer.h"
16#include "llvm/MC/MCExpr.h"
17#include "llvm/MC/MCInst.h"
18#include "llvm/Target/TargetRegistry.h"
19#include "llvm/Target/TargetAsmParser.h"
20#include "llvm/Support/SourceMgr.h"
21#include "llvm/Support/raw_ostream.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringSwitch.h"
24#include "llvm/ADT/Twine.h"
25using namespace llvm;
26
27// The shift types for register controlled shifts in arm memory addressing
28enum ShiftType {
29  Lsl,
30  Lsr,
31  Asr,
32  Ror,
33  Rrx
34};
35
36namespace {
37  struct ARMOperand;
38
39class ARMAsmParser : public TargetAsmParser {
40  MCAsmParser &Parser;
41  TargetMachine &TM;
42
43private:
44  MCAsmParser &getParser() const { return Parser; }
45
46  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
47
48  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
49
50  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
51
52  int TryParseRegister();
53  ARMOperand *TryParseRegisterWithWriteBack();
54  ARMOperand *ParseRegisterList();
55  ARMOperand *ParseMemory();
56
57  bool ParseMemoryOffsetReg(bool &Negative,
58                            bool &OffsetRegShifted,
59                            enum ShiftType &ShiftType,
60                            const MCExpr *&ShiftAmount,
61                            const MCExpr *&Offset,
62                            bool &OffsetIsReg,
63                            int &OffsetRegNum,
64                            SMLoc &E);
65
66  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
67
68  ARMOperand *ParseOperand();
69
70  bool ParseDirectiveWord(unsigned Size, SMLoc L);
71
72  bool ParseDirectiveThumb(SMLoc L);
73
74  bool ParseDirectiveThumbFunc(SMLoc L);
75
76  bool ParseDirectiveCode(SMLoc L);
77
78  bool ParseDirectiveSyntax(SMLoc L);
79
80  bool MatchAndEmitInstruction(SMLoc IDLoc,
81                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
82                               MCStreamer &Out);
83
84  /// @name Auto-generated Match Functions
85  /// {
86
87#define GET_ASSEMBLER_HEADER
88#include "ARMGenAsmMatcher.inc"
89
90  /// }
91
92
93public:
94  ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
95    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
96      // Initialize the set of available features.
97      setAvailableFeatures(ComputeAvailableFeatures(
98          &TM.getSubtarget<ARMSubtarget>()));
99    }
100
101  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
102                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
103
104  virtual bool ParseDirective(AsmToken DirectiveID);
105};
106} // end anonymous namespace
107
108namespace {
109
110/// ARMOperand - Instances of this class represent a parsed ARM machine
111/// instruction.
112struct ARMOperand : public MCParsedAsmOperand {
113public:
114  enum KindTy {
115    CondCode,
116    Immediate,
117    Memory,
118    Register,
119    Token
120  } Kind;
121
122  SMLoc StartLoc, EndLoc;
123
124  union {
125    struct {
126      ARMCC::CondCodes Val;
127    } CC;
128
129    struct {
130      const char *Data;
131      unsigned Length;
132    } Tok;
133
134    struct {
135      unsigned RegNum;
136      bool Writeback;
137    } Reg;
138
139    struct {
140      const MCExpr *Val;
141    } Imm;
142
143    // This is for all forms of ARM address expressions
144    struct {
145      unsigned BaseRegNum;
146      unsigned OffsetRegNum; // used when OffsetIsReg is true
147      const MCExpr *Offset; // used when OffsetIsReg is false
148      const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
149      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
150      unsigned
151        OffsetRegShifted : 1, // only used when OffsetIsReg is true
152        Preindexed : 1,
153        Postindexed : 1,
154        OffsetIsReg : 1,
155        Negative : 1, // only used when OffsetIsReg is true
156        Writeback : 1;
157    } Mem;
158
159  };
160
161  ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
162    Kind = o.Kind;
163    StartLoc = o.StartLoc;
164    EndLoc = o.EndLoc;
165    switch (Kind) {
166    case CondCode:
167      CC = o.CC;
168      break;
169    case Token:
170      Tok = o.Tok;
171      break;
172    case Register:
173      Reg = o.Reg;
174      break;
175    case Immediate:
176      Imm = o.Imm;
177      break;
178    case Memory:
179      Mem = o.Mem;
180      break;
181    }
182  }
183
184  /// getStartLoc - Get the location of the first token of this operand.
185  SMLoc getStartLoc() const { return StartLoc; }
186  /// getEndLoc - Get the location of the last token of this operand.
187  SMLoc getEndLoc() const { return EndLoc; }
188
189  ARMCC::CondCodes getCondCode() const {
190    assert(Kind == CondCode && "Invalid access!");
191    return CC.Val;
192  }
193
194  StringRef getToken() const {
195    assert(Kind == Token && "Invalid access!");
196    return StringRef(Tok.Data, Tok.Length);
197  }
198
199  unsigned getReg() const {
200    assert(Kind == Register && "Invalid access!");
201    return Reg.RegNum;
202  }
203
204  const MCExpr *getImm() const {
205    assert(Kind == Immediate && "Invalid access!");
206    return Imm.Val;
207  }
208
209  bool isCondCode() const { return Kind == CondCode; }
210  bool isImm() const { return Kind == Immediate; }
211  bool isReg() const { return Kind == Register; }
212  bool isToken() const { return Kind == Token; }
213  bool isMemory() const { return Kind == Memory; }
214
215  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
216    // Add as immediates when possible.  Null MCExpr = 0.
217    if (Expr == 0)
218      Inst.addOperand(MCOperand::CreateImm(0));
219    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
220      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
221    else
222      Inst.addOperand(MCOperand::CreateExpr(Expr));
223  }
224
225  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
226    assert(N == 2 && "Invalid number of operands!");
227    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
228    // FIXME: What belongs here?
229    Inst.addOperand(MCOperand::CreateReg(0));
230  }
231
232  void addRegOperands(MCInst &Inst, unsigned N) const {
233    assert(N == 1 && "Invalid number of operands!");
234    Inst.addOperand(MCOperand::CreateReg(getReg()));
235  }
236
237  void addImmOperands(MCInst &Inst, unsigned N) const {
238    assert(N == 1 && "Invalid number of operands!");
239    addExpr(Inst, getImm());
240  }
241
242
243  bool isMemMode5() const {
244    if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted ||
245        Mem.Writeback || Mem.Negative)
246      return false;
247    // If there is an offset expression, make sure it's valid.
248    if (!Mem.Offset)
249      return true;
250    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
251    if (!CE)
252      return false;
253    // The offset must be a multiple of 4 in the range 0-1020.
254    int64_t Value = CE->getValue();
255    return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
256  }
257
258  void addMemMode5Operands(MCInst &Inst, unsigned N) const {
259    assert(N == 2 && isMemMode5() && "Invalid number of operands!");
260
261    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
262    assert(!Mem.OffsetIsReg && "invalid mode 5 operand");
263    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
264    // the difference?
265    if (Mem.Offset) {
266      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
267      assert(CE && "non-constant mode 5 offset operand!");
268      // The MCInst offset operand doesn't include the low two bits (like
269      // the instruction encoding).
270      Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
271    } else
272      Inst.addOperand(MCOperand::CreateImm(0));
273  }
274
275  virtual void dump(raw_ostream &OS) const;
276
277  static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
278    ARMOperand *Op = new ARMOperand(CondCode);
279    Op->CC.Val = CC;
280    Op->StartLoc = S;
281    Op->EndLoc = S;
282    return Op;
283  }
284
285  static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
286    ARMOperand *Op = new ARMOperand(Token);
287    Op->Tok.Data = Str.data();
288    Op->Tok.Length = Str.size();
289    Op->StartLoc = S;
290    Op->EndLoc = S;
291    return Op;
292  }
293
294  static ARMOperand *CreateReg(unsigned RegNum, bool Writeback, SMLoc S,
295                               SMLoc E) {
296    ARMOperand *Op = new ARMOperand(Register);
297    Op->Reg.RegNum = RegNum;
298    Op->Reg.Writeback = Writeback;
299    Op->StartLoc = S;
300    Op->EndLoc = E;
301    return Op;
302  }
303
304  static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
305    ARMOperand *Op = new ARMOperand(Immediate);
306    Op->Imm.Val = Val;
307    Op->StartLoc = S;
308    Op->EndLoc = E;
309    return Op;
310  }
311
312  static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
313                               const MCExpr *Offset, unsigned OffsetRegNum,
314                               bool OffsetRegShifted, enum ShiftType ShiftType,
315                               const MCExpr *ShiftAmount, bool Preindexed,
316                               bool Postindexed, bool Negative, bool Writeback,
317                               SMLoc S, SMLoc E) {
318    ARMOperand *Op = new ARMOperand(Memory);
319    Op->Mem.BaseRegNum = BaseRegNum;
320    Op->Mem.OffsetIsReg = OffsetIsReg;
321    Op->Mem.Offset = Offset;
322    Op->Mem.OffsetRegNum = OffsetRegNum;
323    Op->Mem.OffsetRegShifted = OffsetRegShifted;
324    Op->Mem.ShiftType = ShiftType;
325    Op->Mem.ShiftAmount = ShiftAmount;
326    Op->Mem.Preindexed = Preindexed;
327    Op->Mem.Postindexed = Postindexed;
328    Op->Mem.Negative = Negative;
329    Op->Mem.Writeback = Writeback;
330
331    Op->StartLoc = S;
332    Op->EndLoc = E;
333    return Op;
334  }
335
336private:
337  ARMOperand(KindTy K) : Kind(K) {}
338};
339
340} // end anonymous namespace.
341
342void ARMOperand::dump(raw_ostream &OS) const {
343  switch (Kind) {
344  case CondCode:
345    OS << ARMCondCodeToString(getCondCode());
346    break;
347  case Immediate:
348    getImm()->print(OS);
349    break;
350  case Memory:
351    OS << "<memory>";
352    break;
353  case Register:
354    OS << "<register " << getReg() << ">";
355    break;
356  case Token:
357    OS << "'" << getToken() << "'";
358    break;
359  }
360}
361
362/// @name Auto-generated Match Functions
363/// {
364
365static unsigned MatchRegisterName(StringRef Name);
366
367/// }
368
369/// Try to parse a register name.  The token must be an Identifier when called,
370/// and if it is a register name the token is eaten and the register number is
371/// returned.  Otherwise return -1.
372///
373int ARMAsmParser::TryParseRegister() {
374  const AsmToken &Tok = Parser.getTok();
375  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
376
377  // FIXME: Validate register for the current architecture; we have to do
378  // validation later, so maybe there is no need for this here.
379  int RegNum = MatchRegisterName(Tok.getString());
380  if (RegNum == -1)
381    return -1;
382  Parser.Lex(); // Eat identifier token.
383  return RegNum;
384}
385
386
387/// Try to parse a register name.  The token must be an Identifier when called,
388/// and if it is a register name the token is eaten and the register number is
389/// returned.  Otherwise return -1.
390///
391/// TODO this is likely to change to allow different register types and or to
392/// parse for a specific register type.
393ARMOperand *ARMAsmParser::TryParseRegisterWithWriteBack() {
394  SMLoc S = Parser.getTok().getLoc();
395  int RegNo = TryParseRegister();
396  if (RegNo == -1) return 0;
397
398  SMLoc E = Parser.getTok().getLoc();
399
400  bool Writeback = false;
401  const AsmToken &ExclaimTok = Parser.getTok();
402  if (ExclaimTok.is(AsmToken::Exclaim)) {
403    E = ExclaimTok.getLoc();
404    Writeback = true;
405    Parser.Lex(); // Eat exclaim token
406  }
407
408  return ARMOperand::CreateReg(RegNo, Writeback, S, E);
409}
410
411/// Parse a register list, return it if successful else return null.  The first
412/// token must be a '{' when called.
413ARMOperand *ARMAsmParser::ParseRegisterList() {
414  SMLoc S, E;
415  assert(Parser.getTok().is(AsmToken::LCurly) &&
416         "Token is not an Left Curly Brace");
417  S = Parser.getTok().getLoc();
418  Parser.Lex(); // Eat left curly brace token.
419
420  const AsmToken &RegTok = Parser.getTok();
421  SMLoc RegLoc = RegTok.getLoc();
422  if (RegTok.isNot(AsmToken::Identifier)) {
423    Error(RegLoc, "register expected");
424    return 0;
425  }
426  int RegNum = MatchRegisterName(RegTok.getString());
427  if (RegNum == -1) {
428    Error(RegLoc, "register expected");
429    return 0;
430  }
431
432  Parser.Lex(); // Eat identifier token.
433  unsigned RegList = 1 << RegNum;
434
435  int HighRegNum = RegNum;
436  // TODO ranges like "{Rn-Rm}"
437  while (Parser.getTok().is(AsmToken::Comma)) {
438    Parser.Lex(); // Eat comma token.
439
440    const AsmToken &RegTok = Parser.getTok();
441    SMLoc RegLoc = RegTok.getLoc();
442    if (RegTok.isNot(AsmToken::Identifier)) {
443      Error(RegLoc, "register expected");
444      return 0;
445    }
446    int RegNum = MatchRegisterName(RegTok.getString());
447    if (RegNum == -1) {
448      Error(RegLoc, "register expected");
449      return 0;
450    }
451
452    if (RegList & (1 << RegNum))
453      Warning(RegLoc, "register duplicated in register list");
454    else if (RegNum <= HighRegNum)
455      Warning(RegLoc, "register not in ascending order in register list");
456    RegList |= 1 << RegNum;
457    HighRegNum = RegNum;
458
459    Parser.Lex(); // Eat identifier token.
460  }
461  const AsmToken &RCurlyTok = Parser.getTok();
462  if (RCurlyTok.isNot(AsmToken::RCurly)) {
463    Error(RCurlyTok.getLoc(), "'}' expected");
464    return 0;
465  }
466  E = RCurlyTok.getLoc();
467  Parser.Lex(); // Eat left curly brace token.
468
469  // FIXME: Need to return an operand!
470  Error(E, "FIXME: register list parsing not implemented");
471  return 0;
472}
473
474/// Parse an arm memory expression, return false if successful else return true
475/// or an error.  The first token must be a '[' when called.
476/// TODO Only preindexing and postindexing addressing are started, unindexed
477/// with option, etc are still to do.
478ARMOperand *ARMAsmParser::ParseMemory() {
479  SMLoc S, E;
480  assert(Parser.getTok().is(AsmToken::LBrac) &&
481         "Token is not an Left Bracket");
482  S = Parser.getTok().getLoc();
483  Parser.Lex(); // Eat left bracket token.
484
485  const AsmToken &BaseRegTok = Parser.getTok();
486  if (BaseRegTok.isNot(AsmToken::Identifier)) {
487    Error(BaseRegTok.getLoc(), "register expected");
488    return 0;
489  }
490  int BaseRegNum = TryParseRegister();
491  if (BaseRegNum == -1) {
492    Error(BaseRegTok.getLoc(), "register expected");
493    return 0;
494  }
495
496  bool Preindexed = false;
497  bool Postindexed = false;
498  bool OffsetIsReg = false;
499  bool Negative = false;
500  bool Writeback = false;
501
502  // First look for preindexed address forms, that is after the "[Rn" we now
503  // have to see if the next token is a comma.
504  const AsmToken &Tok = Parser.getTok();
505  if (Tok.is(AsmToken::Comma)) {
506    Preindexed = true;
507    Parser.Lex(); // Eat comma token.
508    int OffsetRegNum;
509    bool OffsetRegShifted;
510    enum ShiftType ShiftType;
511    const MCExpr *ShiftAmount;
512    const MCExpr *Offset;
513    if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
514                             Offset, OffsetIsReg, OffsetRegNum, E))
515      return 0;
516    const AsmToken &RBracTok = Parser.getTok();
517    if (RBracTok.isNot(AsmToken::RBrac)) {
518      Error(RBracTok.getLoc(), "']' expected");
519      return 0;
520    }
521    E = RBracTok.getLoc();
522    Parser.Lex(); // Eat right bracket token.
523
524    const AsmToken &ExclaimTok = Parser.getTok();
525    if (ExclaimTok.is(AsmToken::Exclaim)) {
526      E = ExclaimTok.getLoc();
527      Writeback = true;
528      Parser.Lex(); // Eat exclaim token
529    }
530    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
531                                 OffsetRegShifted, ShiftType, ShiftAmount,
532                                 Preindexed, Postindexed, Negative, Writeback,
533                                 S, E);
534  }
535  // The "[Rn" we have so far was not followed by a comma.
536  else if (Tok.is(AsmToken::RBrac)) {
537    // If there's anything other than the right brace, this is a post indexing
538    // addressing form.
539    E = Tok.getLoc();
540    Parser.Lex(); // Eat right bracket token.
541
542    int OffsetRegNum = 0;
543    bool OffsetRegShifted = false;
544    enum ShiftType ShiftType;
545    const MCExpr *ShiftAmount;
546    const MCExpr *Offset = 0;
547
548    const AsmToken &NextTok = Parser.getTok();
549    if (NextTok.isNot(AsmToken::EndOfStatement)) {
550      Postindexed = true;
551      Writeback = true;
552      if (NextTok.isNot(AsmToken::Comma)) {
553        Error(NextTok.getLoc(), "',' expected");
554        return 0;
555      }
556      Parser.Lex(); // Eat comma token.
557      if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
558                               ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
559                               E))
560        return 0;
561    }
562
563    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
564                                 OffsetRegShifted, ShiftType, ShiftAmount,
565                                 Preindexed, Postindexed, Negative, Writeback,
566                                 S, E);
567  }
568
569  return 0;
570}
571
572/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
573/// we will parse the following (were +/- means that a plus or minus is
574/// optional):
575///   +/-Rm
576///   +/-Rm, shift
577///   #offset
578/// we return false on success or an error otherwise.
579bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
580                                        bool &OffsetRegShifted,
581                                        enum ShiftType &ShiftType,
582                                        const MCExpr *&ShiftAmount,
583                                        const MCExpr *&Offset,
584                                        bool &OffsetIsReg,
585                                        int &OffsetRegNum,
586                                        SMLoc &E) {
587  Negative = false;
588  OffsetRegShifted = false;
589  OffsetIsReg = false;
590  OffsetRegNum = -1;
591  const AsmToken &NextTok = Parser.getTok();
592  E = NextTok.getLoc();
593  if (NextTok.is(AsmToken::Plus))
594    Parser.Lex(); // Eat plus token.
595  else if (NextTok.is(AsmToken::Minus)) {
596    Negative = true;
597    Parser.Lex(); // Eat minus token
598  }
599  // See if there is a register following the "[Rn," or "[Rn]," we have so far.
600  const AsmToken &OffsetRegTok = Parser.getTok();
601  if (OffsetRegTok.is(AsmToken::Identifier)) {
602    SMLoc CurLoc = OffsetRegTok.getLoc();
603    OffsetRegNum = TryParseRegister();
604    if (OffsetRegNum != -1) {
605      OffsetIsReg = true;
606      E = CurLoc;
607    }
608  }
609
610  // If we parsed a register as the offset then their can be a shift after that
611  if (OffsetRegNum != -1) {
612    // Look for a comma then a shift
613    const AsmToken &Tok = Parser.getTok();
614    if (Tok.is(AsmToken::Comma)) {
615      Parser.Lex(); // Eat comma token.
616
617      const AsmToken &Tok = Parser.getTok();
618      if (ParseShift(ShiftType, ShiftAmount, E))
619        return Error(Tok.getLoc(), "shift expected");
620      OffsetRegShifted = true;
621    }
622  }
623  else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
624    // Look for #offset following the "[Rn," or "[Rn],"
625    const AsmToken &HashTok = Parser.getTok();
626    if (HashTok.isNot(AsmToken::Hash))
627      return Error(HashTok.getLoc(), "'#' expected");
628
629    Parser.Lex(); // Eat hash token.
630
631    if (getParser().ParseExpression(Offset))
632     return true;
633    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
634  }
635  return false;
636}
637
638/// ParseShift as one of these two:
639///   ( lsl | lsr | asr | ror ) , # shift_amount
640///   rrx
641/// and returns true if it parses a shift otherwise it returns false.
642bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
643                              SMLoc &E) {
644  const AsmToken &Tok = Parser.getTok();
645  if (Tok.isNot(AsmToken::Identifier))
646    return true;
647  StringRef ShiftName = Tok.getString();
648  if (ShiftName == "lsl" || ShiftName == "LSL")
649    St = Lsl;
650  else if (ShiftName == "lsr" || ShiftName == "LSR")
651    St = Lsr;
652  else if (ShiftName == "asr" || ShiftName == "ASR")
653    St = Asr;
654  else if (ShiftName == "ror" || ShiftName == "ROR")
655    St = Ror;
656  else if (ShiftName == "rrx" || ShiftName == "RRX")
657    St = Rrx;
658  else
659    return true;
660  Parser.Lex(); // Eat shift type token.
661
662  // Rrx stands alone.
663  if (St == Rrx)
664    return false;
665
666  // Otherwise, there must be a '#' and a shift amount.
667  const AsmToken &HashTok = Parser.getTok();
668  if (HashTok.isNot(AsmToken::Hash))
669    return Error(HashTok.getLoc(), "'#' expected");
670  Parser.Lex(); // Eat hash token.
671
672  if (getParser().ParseExpression(ShiftAmount))
673    return true;
674
675  return false;
676}
677
678/// Parse a arm instruction operand.  For now this parses the operand regardless
679/// of the mnemonic.
680ARMOperand *ARMAsmParser::ParseOperand() {
681  SMLoc S, E;
682
683  switch (getLexer().getKind()) {
684  case AsmToken::Identifier:
685    if (ARMOperand *Op = TryParseRegisterWithWriteBack())
686      return Op;
687
688    // This was not a register so parse other operands that start with an
689    // identifier (like labels) as expressions and create them as immediates.
690    const MCExpr *IdVal;
691    S = Parser.getTok().getLoc();
692    if (getParser().ParseExpression(IdVal))
693      return 0;
694    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
695    return ARMOperand::CreateImm(IdVal, S, E);
696  case AsmToken::LBrac:
697    return ParseMemory();
698  case AsmToken::LCurly:
699    return ParseRegisterList();
700  case AsmToken::Hash:
701    // #42 -> immediate.
702    // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
703    S = Parser.getTok().getLoc();
704    Parser.Lex();
705    const MCExpr *ImmVal;
706    if (getParser().ParseExpression(ImmVal))
707      return 0;
708    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
709    return ARMOperand::CreateImm(ImmVal, S, E);
710  default:
711    Error(Parser.getTok().getLoc(), "unexpected token in operand");
712    return 0;
713  }
714}
715
716/// Parse an arm instruction mnemonic followed by its operands.
717bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
718                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
719  // Create the leading tokens for the mnemonic, split by '.' characters.
720  size_t Start = 0, Next = Name.find('.');
721  StringRef Head = Name.slice(Start, Next);
722
723  // Determine the predicate, if any.
724  //
725  // FIXME: We need a way to check whether a prefix supports predication,
726  // otherwise we will end up with an ambiguity for instructions that happen to
727  // end with a predicate name.
728  // FIXME: Likewise, some arithmetic instructions have an 's' prefix which
729  // indicates to update the condition codes. Those instructions have an
730  // additional immediate operand which encodes the prefix as reg0 or CPSR.
731  // Just checking for a suffix of 's' definitely creates ambiguities; e.g,
732  // the SMMLS instruction.
733  unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
734    .Case("eq", ARMCC::EQ)
735    .Case("ne", ARMCC::NE)
736    .Case("hs", ARMCC::HS)
737    .Case("lo", ARMCC::LO)
738    .Case("mi", ARMCC::MI)
739    .Case("pl", ARMCC::PL)
740    .Case("vs", ARMCC::VS)
741    .Case("vc", ARMCC::VC)
742    .Case("hi", ARMCC::HI)
743    .Case("ls", ARMCC::LS)
744    .Case("ge", ARMCC::GE)
745    .Case("lt", ARMCC::LT)
746    .Case("gt", ARMCC::GT)
747    .Case("le", ARMCC::LE)
748    .Case("al", ARMCC::AL)
749    .Default(~0U);
750
751  if (CC == ~0U ||
752      (CC == ARMCC::LS && (Head == "vmls" || Head == "vnmls"))) {
753    CC = ARMCC::AL;
754  } else {
755    Head = Head.slice(0, Head.size() - 2);
756  }
757
758  Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
759  // FIXME: Should only add this operand for predicated instructions
760  Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), NameLoc));
761
762  // Add the remaining tokens in the mnemonic.
763  while (Next != StringRef::npos) {
764    Start = Next;
765    Next = Name.find('.', Start + 1);
766    Head = Name.slice(Start, Next);
767
768    Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
769  }
770
771  // Read the remaining operands.
772  if (getLexer().isNot(AsmToken::EndOfStatement)) {
773    // Read the first operand.
774    if (ARMOperand *Op = ParseOperand())
775      Operands.push_back(Op);
776    else {
777      Parser.EatToEndOfStatement();
778      return true;
779    }
780
781    while (getLexer().is(AsmToken::Comma)) {
782      Parser.Lex();  // Eat the comma.
783
784      // Parse and remember the operand.
785      if (ARMOperand *Op = ParseOperand())
786        Operands.push_back(Op);
787      else {
788        Parser.EatToEndOfStatement();
789        return true;
790      }
791    }
792  }
793
794  if (getLexer().isNot(AsmToken::EndOfStatement)) {
795    Parser.EatToEndOfStatement();
796    return TokError("unexpected token in argument list");
797  }
798  Parser.Lex(); // Consume the EndOfStatement
799  return false;
800}
801
802bool ARMAsmParser::
803MatchAndEmitInstruction(SMLoc IDLoc,
804                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
805                        MCStreamer &Out) {
806  MCInst Inst;
807  unsigned ErrorInfo;
808  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
809  case Match_Success:
810    Out.EmitInstruction(Inst);
811    return false;
812
813  case Match_MissingFeature:
814    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
815    return true;
816  case Match_InvalidOperand: {
817    SMLoc ErrorLoc = IDLoc;
818    if (ErrorInfo != ~0U) {
819      if (ErrorInfo >= Operands.size())
820        return Error(IDLoc, "too few operands for instruction");
821
822      ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
823      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
824    }
825
826    return Error(ErrorLoc, "invalid operand for instruction");
827  }
828  case Match_MnemonicFail:
829    return Error(IDLoc, "unrecognized instruction mnemonic");
830  }
831
832  llvm_unreachable("Implement any new match types added!");
833}
834
835
836
837/// ParseDirective parses the arm specific directives
838bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
839  StringRef IDVal = DirectiveID.getIdentifier();
840  if (IDVal == ".word")
841    return ParseDirectiveWord(4, DirectiveID.getLoc());
842  else if (IDVal == ".thumb")
843    return ParseDirectiveThumb(DirectiveID.getLoc());
844  else if (IDVal == ".thumb_func")
845    return ParseDirectiveThumbFunc(DirectiveID.getLoc());
846  else if (IDVal == ".code")
847    return ParseDirectiveCode(DirectiveID.getLoc());
848  else if (IDVal == ".syntax")
849    return ParseDirectiveSyntax(DirectiveID.getLoc());
850  return true;
851}
852
853/// ParseDirectiveWord
854///  ::= .word [ expression (, expression)* ]
855bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
856  if (getLexer().isNot(AsmToken::EndOfStatement)) {
857    for (;;) {
858      const MCExpr *Value;
859      if (getParser().ParseExpression(Value))
860        return true;
861
862      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
863
864      if (getLexer().is(AsmToken::EndOfStatement))
865        break;
866
867      // FIXME: Improve diagnostic.
868      if (getLexer().isNot(AsmToken::Comma))
869        return Error(L, "unexpected token in directive");
870      Parser.Lex();
871    }
872  }
873
874  Parser.Lex();
875  return false;
876}
877
878/// ParseDirectiveThumb
879///  ::= .thumb
880bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
881  if (getLexer().isNot(AsmToken::EndOfStatement))
882    return Error(L, "unexpected token in directive");
883  Parser.Lex();
884
885  // TODO: set thumb mode
886  // TODO: tell the MC streamer the mode
887  // getParser().getStreamer().Emit???();
888  return false;
889}
890
891/// ParseDirectiveThumbFunc
892///  ::= .thumbfunc symbol_name
893bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
894  const AsmToken &Tok = Parser.getTok();
895  if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
896    return Error(L, "unexpected token in .syntax directive");
897  Parser.Lex(); // Consume the identifier token.
898
899  if (getLexer().isNot(AsmToken::EndOfStatement))
900    return Error(L, "unexpected token in directive");
901  Parser.Lex();
902
903  // TODO: mark symbol as a thumb symbol
904  // getParser().getStreamer().Emit???();
905  return false;
906}
907
908/// ParseDirectiveSyntax
909///  ::= .syntax unified | divided
910bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
911  const AsmToken &Tok = Parser.getTok();
912  if (Tok.isNot(AsmToken::Identifier))
913    return Error(L, "unexpected token in .syntax directive");
914  StringRef Mode = Tok.getString();
915  if (Mode == "unified" || Mode == "UNIFIED")
916    Parser.Lex();
917  else if (Mode == "divided" || Mode == "DIVIDED")
918    Parser.Lex();
919  else
920    return Error(L, "unrecognized syntax mode in .syntax directive");
921
922  if (getLexer().isNot(AsmToken::EndOfStatement))
923    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
924  Parser.Lex();
925
926  // TODO tell the MC streamer the mode
927  // getParser().getStreamer().Emit???();
928  return false;
929}
930
931/// ParseDirectiveCode
932///  ::= .code 16 | 32
933bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
934  const AsmToken &Tok = Parser.getTok();
935  if (Tok.isNot(AsmToken::Integer))
936    return Error(L, "unexpected token in .code directive");
937  int64_t Val = Parser.getTok().getIntVal();
938  if (Val == 16)
939    Parser.Lex();
940  else if (Val == 32)
941    Parser.Lex();
942  else
943    return Error(L, "invalid operand to .code directive");
944
945  if (getLexer().isNot(AsmToken::EndOfStatement))
946    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
947  Parser.Lex();
948
949  // TODO tell the MC streamer the mode
950  // getParser().getStreamer().Emit???();
951  return false;
952}
953
954extern "C" void LLVMInitializeARMAsmLexer();
955
956/// Force static initialization.
957extern "C" void LLVMInitializeARMAsmParser() {
958  RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
959  RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
960  LLVMInitializeARMAsmLexer();
961}
962
963#define GET_REGISTER_MATCHER
964#define GET_MATCHER_IMPLEMENTATION
965#include "ARMGenAsmMatcher.inc"
966