ARMAsmParser.cpp revision 74a33b27463eefdf57e65ac2f7a60dad2b3fe4f7
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARM.h"
11#include "ARMSubtarget.h"
12#include "llvm/MC/MCParser/MCAsmLexer.h"
13#include "llvm/MC/MCParser/MCAsmParser.h"
14#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
15#include "llvm/MC/MCStreamer.h"
16#include "llvm/MC/MCExpr.h"
17#include "llvm/MC/MCInst.h"
18#include "llvm/Target/TargetRegistry.h"
19#include "llvm/Target/TargetAsmParser.h"
20#include "llvm/Support/SourceMgr.h"
21#include "llvm/Support/raw_ostream.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringSwitch.h"
24#include "llvm/ADT/Twine.h"
25using namespace llvm;
26
27// The shift types for register controlled shifts in arm memory addressing
28enum ShiftType {
29  Lsl,
30  Lsr,
31  Asr,
32  Ror,
33  Rrx
34};
35
36namespace {
37  struct ARMOperand;
38
39class ARMAsmParser : public TargetAsmParser {
40  MCAsmParser &Parser;
41  TargetMachine &TM;
42
43private:
44  MCAsmParser &getParser() const { return Parser; }
45
46  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
47
48  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
49
50  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
51
52  int TryParseRegister();
53  ARMOperand *TryParseRegisterWithWriteBack();
54  ARMOperand *ParseRegisterList();
55  ARMOperand *ParseMemory();
56
57  bool ParseMemoryOffsetReg(bool &Negative,
58                            bool &OffsetRegShifted,
59                            enum ShiftType &ShiftType,
60                            const MCExpr *&ShiftAmount,
61                            const MCExpr *&Offset,
62                            bool &OffsetIsReg,
63                            int &OffsetRegNum,
64                            SMLoc &E);
65
66  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
67
68  ARMOperand *ParseOperand();
69
70  bool ParseDirectiveWord(unsigned Size, SMLoc L);
71
72  bool ParseDirectiveThumb(SMLoc L);
73
74  bool ParseDirectiveThumbFunc(SMLoc L);
75
76  bool ParseDirectiveCode(SMLoc L);
77
78  bool ParseDirectiveSyntax(SMLoc L);
79
80  bool MatchAndEmitInstruction(SMLoc IDLoc,
81                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
82                               MCStreamer &Out);
83
84  /// @name Auto-generated Match Functions
85  /// {
86
87#define GET_ASSEMBLER_HEADER
88#include "ARMGenAsmMatcher.inc"
89
90  /// }
91
92
93public:
94  ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
95    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
96
97  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
98                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
99
100  virtual bool ParseDirective(AsmToken DirectiveID);
101};
102} // end anonymous namespace
103
104namespace {
105
106/// ARMOperand - Instances of this class represent a parsed ARM machine
107/// instruction.
108struct ARMOperand : public MCParsedAsmOperand {
109public:
110  enum KindTy {
111    CondCode,
112    Immediate,
113    Memory,
114    Register,
115    Token
116  } Kind;
117
118  SMLoc StartLoc, EndLoc;
119
120  union {
121    struct {
122      ARMCC::CondCodes Val;
123    } CC;
124
125    struct {
126      const char *Data;
127      unsigned Length;
128    } Tok;
129
130    struct {
131      unsigned RegNum;
132      bool Writeback;
133    } Reg;
134
135    struct {
136      const MCExpr *Val;
137    } Imm;
138
139    // This is for all forms of ARM address expressions
140    struct {
141      unsigned BaseRegNum;
142      unsigned OffsetRegNum; // used when OffsetIsReg is true
143      const MCExpr *Offset; // used when OffsetIsReg is false
144      const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
145      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
146      unsigned
147        OffsetRegShifted : 1, // only used when OffsetIsReg is true
148        Preindexed : 1,
149        Postindexed : 1,
150        OffsetIsReg : 1,
151        Negative : 1, // only used when OffsetIsReg is true
152        Writeback : 1;
153    } Mem;
154
155  };
156
157  ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
158    Kind = o.Kind;
159    StartLoc = o.StartLoc;
160    EndLoc = o.EndLoc;
161    switch (Kind) {
162    case CondCode:
163      CC = o.CC;
164      break;
165    case Token:
166      Tok = o.Tok;
167      break;
168    case Register:
169      Reg = o.Reg;
170      break;
171    case Immediate:
172      Imm = o.Imm;
173      break;
174    case Memory:
175      Mem = o.Mem;
176      break;
177    }
178  }
179
180  /// getStartLoc - Get the location of the first token of this operand.
181  SMLoc getStartLoc() const { return StartLoc; }
182  /// getEndLoc - Get the location of the last token of this operand.
183  SMLoc getEndLoc() const { return EndLoc; }
184
185  ARMCC::CondCodes getCondCode() const {
186    assert(Kind == CondCode && "Invalid access!");
187    return CC.Val;
188  }
189
190  StringRef getToken() const {
191    assert(Kind == Token && "Invalid access!");
192    return StringRef(Tok.Data, Tok.Length);
193  }
194
195  unsigned getReg() const {
196    assert(Kind == Register && "Invalid access!");
197    return Reg.RegNum;
198  }
199
200  const MCExpr *getImm() const {
201    assert(Kind == Immediate && "Invalid access!");
202    return Imm.Val;
203  }
204
205  bool isCondCode() const { return Kind == CondCode; }
206  bool isImm() const { return Kind == Immediate; }
207  bool isReg() const { return Kind == Register; }
208  bool isToken() const { return Kind == Token; }
209  bool isMemory() const { return Kind == Memory; }
210
211  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
212    // Add as immediates when possible.  Null MCExpr = 0.
213    if (Expr == 0)
214      Inst.addOperand(MCOperand::CreateImm(0));
215    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
216      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
217    else
218      Inst.addOperand(MCOperand::CreateExpr(Expr));
219  }
220
221  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
222    assert(N == 2 && "Invalid number of operands!");
223    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
224    // FIXME: What belongs here?
225    Inst.addOperand(MCOperand::CreateReg(0));
226  }
227
228  void addRegOperands(MCInst &Inst, unsigned N) const {
229    assert(N == 1 && "Invalid number of operands!");
230    Inst.addOperand(MCOperand::CreateReg(getReg()));
231  }
232
233  void addImmOperands(MCInst &Inst, unsigned N) const {
234    assert(N == 1 && "Invalid number of operands!");
235    addExpr(Inst, getImm());
236  }
237
238
239  bool isMemMode5() const {
240    if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted ||
241        Mem.Writeback || Mem.Negative)
242      return false;
243    // If there is an offset expression, make sure it's valid.
244    if (!Mem.Offset)
245      return true;
246    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
247    if (!CE)
248      return false;
249    // The offset must be a multiple of 4 in the range 0-1020.
250    int64_t Value = CE->getValue();
251    return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
252  }
253
254  void addMemMode5Operands(MCInst &Inst, unsigned N) const {
255    assert(N == 2 && isMemMode5() && "Invalid number of operands!");
256
257    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
258    assert(!Mem.OffsetIsReg && "invalid mode 5 operand");
259    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
260    // the difference?
261    if (Mem.Offset) {
262      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
263      assert (CE && "non-constant mode 5 offset operand!");
264      // The MCInst offset operand doesn't include the low two bits (like
265      // the instruction encoding).
266      Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
267    } else
268      Inst.addOperand(MCOperand::CreateImm(0));
269  }
270
271  virtual void dump(raw_ostream &OS) const;
272
273  static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
274    ARMOperand *Op = new ARMOperand(CondCode);
275    Op->CC.Val = CC;
276    Op->StartLoc = S;
277    Op->EndLoc = S;
278    return Op;
279  }
280
281  static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
282    ARMOperand *Op = new ARMOperand(Token);
283    Op->Tok.Data = Str.data();
284    Op->Tok.Length = Str.size();
285    Op->StartLoc = S;
286    Op->EndLoc = S;
287    return Op;
288  }
289
290  static ARMOperand *CreateReg(unsigned RegNum, bool Writeback, SMLoc S,
291                               SMLoc E) {
292    ARMOperand *Op = new ARMOperand(Register);
293    Op->Reg.RegNum = RegNum;
294    Op->Reg.Writeback = Writeback;
295    Op->StartLoc = S;
296    Op->EndLoc = E;
297    return Op;
298  }
299
300  static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
301    ARMOperand *Op = new ARMOperand(Immediate);
302    Op->Imm.Val = Val;
303    Op->StartLoc = S;
304    Op->EndLoc = E;
305    return Op;
306  }
307
308  static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
309                               const MCExpr *Offset, unsigned OffsetRegNum,
310                               bool OffsetRegShifted, enum ShiftType ShiftType,
311                               const MCExpr *ShiftAmount, bool Preindexed,
312                               bool Postindexed, bool Negative, bool Writeback,
313                               SMLoc S, SMLoc E) {
314    ARMOperand *Op = new ARMOperand(Memory);
315    Op->Mem.BaseRegNum = BaseRegNum;
316    Op->Mem.OffsetIsReg = OffsetIsReg;
317    Op->Mem.Offset = Offset;
318    Op->Mem.OffsetRegNum = OffsetRegNum;
319    Op->Mem.OffsetRegShifted = OffsetRegShifted;
320    Op->Mem.ShiftType = ShiftType;
321    Op->Mem.ShiftAmount = ShiftAmount;
322    Op->Mem.Preindexed = Preindexed;
323    Op->Mem.Postindexed = Postindexed;
324    Op->Mem.Negative = Negative;
325    Op->Mem.Writeback = Writeback;
326
327    Op->StartLoc = S;
328    Op->EndLoc = E;
329    return Op;
330  }
331
332private:
333  ARMOperand(KindTy K) : Kind(K) {}
334};
335
336} // end anonymous namespace.
337
338void ARMOperand::dump(raw_ostream &OS) const {
339  switch (Kind) {
340  case CondCode:
341    OS << ARMCondCodeToString(getCondCode());
342    break;
343  case Immediate:
344    getImm()->print(OS);
345    break;
346  case Memory:
347    OS << "<memory>";
348    break;
349  case Register:
350    OS << "<register " << getReg() << ">";
351    break;
352  case Token:
353    OS << "'" << getToken() << "'";
354    break;
355  }
356}
357
358/// @name Auto-generated Match Functions
359/// {
360
361static unsigned MatchRegisterName(StringRef Name);
362
363/// }
364
365/// Try to parse a register name.  The token must be an Identifier when called,
366/// and if it is a register name the token is eaten and the register number is
367/// returned.  Otherwise return -1.
368///
369int ARMAsmParser::TryParseRegister() {
370  const AsmToken &Tok = Parser.getTok();
371  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
372
373  // FIXME: Validate register for the current architecture; we have to do
374  // validation later, so maybe there is no need for this here.
375  int RegNum = MatchRegisterName(Tok.getString());
376  if (RegNum == -1)
377    return -1;
378  Parser.Lex(); // Eat identifier token.
379  return RegNum;
380}
381
382
383/// Try to parse a register name.  The token must be an Identifier when called,
384/// and if it is a register name the token is eaten and the register number is
385/// returned.  Otherwise return -1.
386///
387/// TODO this is likely to change to allow different register types and or to
388/// parse for a specific register type.
389ARMOperand *ARMAsmParser::TryParseRegisterWithWriteBack() {
390  SMLoc S = Parser.getTok().getLoc();
391  int RegNo = TryParseRegister();
392  if (RegNo == -1) return 0;
393
394  SMLoc E = Parser.getTok().getLoc();
395
396  bool Writeback = false;
397  const AsmToken &ExclaimTok = Parser.getTok();
398  if (ExclaimTok.is(AsmToken::Exclaim)) {
399    E = ExclaimTok.getLoc();
400    Writeback = true;
401    Parser.Lex(); // Eat exclaim token
402  }
403
404  return ARMOperand::CreateReg(RegNo, Writeback, S, E);
405}
406
407/// Parse a register list, return it if successful else return null.  The first
408/// token must be a '{' when called.
409ARMOperand *ARMAsmParser::ParseRegisterList() {
410  SMLoc S, E;
411  assert(Parser.getTok().is(AsmToken::LCurly) &&
412         "Token is not an Left Curly Brace");
413  S = Parser.getTok().getLoc();
414  Parser.Lex(); // Eat left curly brace token.
415
416  const AsmToken &RegTok = Parser.getTok();
417  SMLoc RegLoc = RegTok.getLoc();
418  if (RegTok.isNot(AsmToken::Identifier)) {
419    Error(RegLoc, "register expected");
420    return 0;
421  }
422  int RegNum = MatchRegisterName(RegTok.getString());
423  if (RegNum == -1) {
424    Error(RegLoc, "register expected");
425    return 0;
426  }
427
428  Parser.Lex(); // Eat identifier token.
429  unsigned RegList = 1 << RegNum;
430
431  int HighRegNum = RegNum;
432  // TODO ranges like "{Rn-Rm}"
433  while (Parser.getTok().is(AsmToken::Comma)) {
434    Parser.Lex(); // Eat comma token.
435
436    const AsmToken &RegTok = Parser.getTok();
437    SMLoc RegLoc = RegTok.getLoc();
438    if (RegTok.isNot(AsmToken::Identifier)) {
439      Error(RegLoc, "register expected");
440      return 0;
441    }
442    int RegNum = MatchRegisterName(RegTok.getString());
443    if (RegNum == -1) {
444      Error(RegLoc, "register expected");
445      return 0;
446    }
447
448    if (RegList & (1 << RegNum))
449      Warning(RegLoc, "register duplicated in register list");
450    else if (RegNum <= HighRegNum)
451      Warning(RegLoc, "register not in ascending order in register list");
452    RegList |= 1 << RegNum;
453    HighRegNum = RegNum;
454
455    Parser.Lex(); // Eat identifier token.
456  }
457  const AsmToken &RCurlyTok = Parser.getTok();
458  if (RCurlyTok.isNot(AsmToken::RCurly)) {
459    Error(RCurlyTok.getLoc(), "'}' expected");
460    return 0;
461  }
462  E = RCurlyTok.getLoc();
463  Parser.Lex(); // Eat left curly brace token.
464
465  // FIXME: Need to return an operand!
466  Error(E, "FIXME: register list parsing not implemented");
467  return 0;
468}
469
470/// Parse an arm memory expression, return false if successful else return true
471/// or an error.  The first token must be a '[' when called.
472/// TODO Only preindexing and postindexing addressing are started, unindexed
473/// with option, etc are still to do.
474ARMOperand *ARMAsmParser::ParseMemory() {
475  SMLoc S, E;
476  assert(Parser.getTok().is(AsmToken::LBrac) &&
477         "Token is not an Left Bracket");
478  S = Parser.getTok().getLoc();
479  Parser.Lex(); // Eat left bracket token.
480
481  const AsmToken &BaseRegTok = Parser.getTok();
482  if (BaseRegTok.isNot(AsmToken::Identifier)) {
483    Error(BaseRegTok.getLoc(), "register expected");
484    return 0;
485  }
486  int BaseRegNum = TryParseRegister();
487  if (BaseRegNum == -1) {
488    Error(BaseRegTok.getLoc(), "register expected");
489    return 0;
490  }
491
492  bool Preindexed = false;
493  bool Postindexed = false;
494  bool OffsetIsReg = false;
495  bool Negative = false;
496  bool Writeback = false;
497
498  // First look for preindexed address forms, that is after the "[Rn" we now
499  // have to see if the next token is a comma.
500  const AsmToken &Tok = Parser.getTok();
501  if (Tok.is(AsmToken::Comma)) {
502    Preindexed = true;
503    Parser.Lex(); // Eat comma token.
504    int OffsetRegNum;
505    bool OffsetRegShifted;
506    enum ShiftType ShiftType;
507    const MCExpr *ShiftAmount;
508    const MCExpr *Offset;
509    if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
510                             Offset, OffsetIsReg, OffsetRegNum, E))
511      return 0;
512    const AsmToken &RBracTok = Parser.getTok();
513    if (RBracTok.isNot(AsmToken::RBrac)) {
514      Error(RBracTok.getLoc(), "']' expected");
515      return 0;
516    }
517    E = RBracTok.getLoc();
518    Parser.Lex(); // Eat right bracket token.
519
520    const AsmToken &ExclaimTok = Parser.getTok();
521    if (ExclaimTok.is(AsmToken::Exclaim)) {
522      E = ExclaimTok.getLoc();
523      Writeback = true;
524      Parser.Lex(); // Eat exclaim token
525    }
526    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
527                                 OffsetRegShifted, ShiftType, ShiftAmount,
528                                 Preindexed, Postindexed, Negative, Writeback,
529                                 S, E);
530  }
531  // The "[Rn" we have so far was not followed by a comma.
532  else if (Tok.is(AsmToken::RBrac)) {
533    // If there's anything other than the right brace, this is a post indexing
534    // addressing form.
535    E = Tok.getLoc();
536    Parser.Lex(); // Eat right bracket token.
537
538    int OffsetRegNum = 0;
539    bool OffsetRegShifted = false;
540    enum ShiftType ShiftType;
541    const MCExpr *ShiftAmount;
542    const MCExpr *Offset = 0;
543
544    const AsmToken &NextTok = Parser.getTok();
545    if (NextTok.isNot(AsmToken::EndOfStatement)) {
546      Postindexed = true;
547      Writeback = true;
548      if (NextTok.isNot(AsmToken::Comma)) {
549        Error(NextTok.getLoc(), "',' expected");
550        return 0;
551      }
552      Parser.Lex(); // Eat comma token.
553      if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
554                               ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
555                               E))
556        return 0;
557    }
558
559    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
560                                 OffsetRegShifted, ShiftType, ShiftAmount,
561                                 Preindexed, Postindexed, Negative, Writeback,
562                                 S, E);
563  }
564
565  return 0;
566}
567
568/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
569/// we will parse the following (were +/- means that a plus or minus is
570/// optional):
571///   +/-Rm
572///   +/-Rm, shift
573///   #offset
574/// we return false on success or an error otherwise.
575bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
576                                        bool &OffsetRegShifted,
577                                        enum ShiftType &ShiftType,
578                                        const MCExpr *&ShiftAmount,
579                                        const MCExpr *&Offset,
580                                        bool &OffsetIsReg,
581                                        int &OffsetRegNum,
582                                        SMLoc &E) {
583  Negative = false;
584  OffsetRegShifted = false;
585  OffsetIsReg = false;
586  OffsetRegNum = -1;
587  const AsmToken &NextTok = Parser.getTok();
588  E = NextTok.getLoc();
589  if (NextTok.is(AsmToken::Plus))
590    Parser.Lex(); // Eat plus token.
591  else if (NextTok.is(AsmToken::Minus)) {
592    Negative = true;
593    Parser.Lex(); // Eat minus token
594  }
595  // See if there is a register following the "[Rn," or "[Rn]," we have so far.
596  const AsmToken &OffsetRegTok = Parser.getTok();
597  if (OffsetRegTok.is(AsmToken::Identifier)) {
598    SMLoc CurLoc = OffsetRegTok.getLoc();
599    OffsetRegNum = TryParseRegister();
600    if (OffsetRegNum != -1) {
601      OffsetIsReg = true;
602      E = CurLoc;
603    }
604  }
605
606  // If we parsed a register as the offset then their can be a shift after that
607  if (OffsetRegNum != -1) {
608    // Look for a comma then a shift
609    const AsmToken &Tok = Parser.getTok();
610    if (Tok.is(AsmToken::Comma)) {
611      Parser.Lex(); // Eat comma token.
612
613      const AsmToken &Tok = Parser.getTok();
614      if (ParseShift(ShiftType, ShiftAmount, E))
615        return Error(Tok.getLoc(), "shift expected");
616      OffsetRegShifted = true;
617    }
618  }
619  else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
620    // Look for #offset following the "[Rn," or "[Rn],"
621    const AsmToken &HashTok = Parser.getTok();
622    if (HashTok.isNot(AsmToken::Hash))
623      return Error(HashTok.getLoc(), "'#' expected");
624
625    Parser.Lex(); // Eat hash token.
626
627    if (getParser().ParseExpression(Offset))
628     return true;
629    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
630  }
631  return false;
632}
633
634/// ParseShift as one of these two:
635///   ( lsl | lsr | asr | ror ) , # shift_amount
636///   rrx
637/// and returns true if it parses a shift otherwise it returns false.
638bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
639                              SMLoc &E) {
640  const AsmToken &Tok = Parser.getTok();
641  if (Tok.isNot(AsmToken::Identifier))
642    return true;
643  StringRef ShiftName = Tok.getString();
644  if (ShiftName == "lsl" || ShiftName == "LSL")
645    St = Lsl;
646  else if (ShiftName == "lsr" || ShiftName == "LSR")
647    St = Lsr;
648  else if (ShiftName == "asr" || ShiftName == "ASR")
649    St = Asr;
650  else if (ShiftName == "ror" || ShiftName == "ROR")
651    St = Ror;
652  else if (ShiftName == "rrx" || ShiftName == "RRX")
653    St = Rrx;
654  else
655    return true;
656  Parser.Lex(); // Eat shift type token.
657
658  // Rrx stands alone.
659  if (St == Rrx)
660    return false;
661
662  // Otherwise, there must be a '#' and a shift amount.
663  const AsmToken &HashTok = Parser.getTok();
664  if (HashTok.isNot(AsmToken::Hash))
665    return Error(HashTok.getLoc(), "'#' expected");
666  Parser.Lex(); // Eat hash token.
667
668  if (getParser().ParseExpression(ShiftAmount))
669    return true;
670
671  return false;
672}
673
674/// Parse a arm instruction operand.  For now this parses the operand regardless
675/// of the mnemonic.
676ARMOperand *ARMAsmParser::ParseOperand() {
677  SMLoc S, E;
678
679  switch (getLexer().getKind()) {
680  case AsmToken::Identifier:
681    if (ARMOperand *Op = TryParseRegisterWithWriteBack())
682      return Op;
683
684    // This was not a register so parse other operands that start with an
685    // identifier (like labels) as expressions and create them as immediates.
686    const MCExpr *IdVal;
687    S = Parser.getTok().getLoc();
688    if (getParser().ParseExpression(IdVal))
689      return 0;
690    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
691    return ARMOperand::CreateImm(IdVal, S, E);
692  case AsmToken::LBrac:
693    return ParseMemory();
694  case AsmToken::LCurly:
695    return ParseRegisterList();
696  case AsmToken::Hash:
697    // #42 -> immediate.
698    // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
699    S = Parser.getTok().getLoc();
700    Parser.Lex();
701    const MCExpr *ImmVal;
702    if (getParser().ParseExpression(ImmVal))
703      return 0;
704    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
705    return ARMOperand::CreateImm(ImmVal, S, E);
706  default:
707    Error(Parser.getTok().getLoc(), "unexpected token in operand");
708    return 0;
709  }
710}
711
712/// Parse an arm instruction mnemonic followed by its operands.
713bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
714                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
715  // Create the leading tokens for the mnemonic, split by '.' characters.
716  size_t Start = 0, Next = Name.find('.');
717  StringRef Head = Name.slice(Start, Next);
718
719  // Determine the predicate, if any.
720  //
721  // FIXME: We need a way to check whether a prefix supports predication,
722  // otherwise we will end up with an ambiguity for instructions that happen to
723  // end with a predicate name.
724  // FIXME: Likewise, some arithmetic instructions have an 's' prefix which
725  // indicates to update the condition codes. Those instructions have an
726  // additional immediate operand which encodes the prefix as reg0 or CPSR.
727  // Just checking for a suffix of 's' definitely creates ambiguities; e.g,
728  // the SMMLS instruction.
729  unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
730    .Case("eq", ARMCC::EQ)
731    .Case("ne", ARMCC::NE)
732    .Case("hs", ARMCC::HS)
733    .Case("lo", ARMCC::LO)
734    .Case("mi", ARMCC::MI)
735    .Case("pl", ARMCC::PL)
736    .Case("vs", ARMCC::VS)
737    .Case("vc", ARMCC::VC)
738    .Case("hi", ARMCC::HI)
739    .Case("ls", ARMCC::LS)
740    .Case("ge", ARMCC::GE)
741    .Case("lt", ARMCC::LT)
742    .Case("gt", ARMCC::GT)
743    .Case("le", ARMCC::LE)
744    .Case("al", ARMCC::AL)
745    .Default(~0U);
746
747  if (CC != ~0U) {
748    if (CC == ARMCC::LS &&
749        (Head.compare("vmls") == 0 || Head.compare("vnmls") == 0)) {
750      CC = ARMCC::AL;
751    } else {
752      Head = Head.slice(0, Head.size() - 2);
753    }
754  } else {
755    CC = ARMCC::AL;
756  }
757
758  Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
759  Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), NameLoc));
760
761  // Add the remaining tokens in the mnemonic.
762  while (Next != StringRef::npos) {
763    Start = Next;
764    Next = Name.find('.', Start + 1);
765    Head = Name.slice(Start, Next);
766
767    Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
768  }
769
770  // Read the remaining operands.
771  if (getLexer().isNot(AsmToken::EndOfStatement)) {
772    // Read the first operand.
773    if (ARMOperand *Op = ParseOperand())
774      Operands.push_back(Op);
775    else {
776      Parser.EatToEndOfStatement();
777      return true;
778    }
779
780    while (getLexer().is(AsmToken::Comma)) {
781      Parser.Lex();  // Eat the comma.
782
783      // Parse and remember the operand.
784      if (ARMOperand *Op = ParseOperand())
785        Operands.push_back(Op);
786      else {
787        Parser.EatToEndOfStatement();
788        return true;
789      }
790    }
791  }
792
793  if (getLexer().isNot(AsmToken::EndOfStatement)) {
794    Parser.EatToEndOfStatement();
795    return TokError("unexpected token in argument list");
796  }
797  Parser.Lex(); // Consume the EndOfStatement
798  return false;
799}
800
801bool ARMAsmParser::
802MatchAndEmitInstruction(SMLoc IDLoc,
803                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
804                        MCStreamer &Out) {
805  MCInst Inst;
806  unsigned ErrorInfo;
807  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
808  case Match_Success:
809    Out.EmitInstruction(Inst);
810    return false;
811
812  case Match_MissingFeature:
813    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
814    return true;
815  case Match_InvalidOperand: {
816    SMLoc ErrorLoc = IDLoc;
817    if (ErrorInfo != ~0U) {
818      if (ErrorInfo >= Operands.size())
819        return Error(IDLoc, "too few operands for instruction");
820
821      ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
822      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
823    }
824
825    return Error(ErrorLoc, "invalid operand for instruction");
826  }
827  case Match_MnemonicFail:
828    return Error(IDLoc, "unrecognized instruction mnemonic");
829  }
830
831  llvm_unreachable("Implement any new match types added!");
832}
833
834
835
836/// ParseDirective parses the arm specific directives
837bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
838  StringRef IDVal = DirectiveID.getIdentifier();
839  if (IDVal == ".word")
840    return ParseDirectiveWord(4, DirectiveID.getLoc());
841  else if (IDVal == ".thumb")
842    return ParseDirectiveThumb(DirectiveID.getLoc());
843  else if (IDVal == ".thumb_func")
844    return ParseDirectiveThumbFunc(DirectiveID.getLoc());
845  else if (IDVal == ".code")
846    return ParseDirectiveCode(DirectiveID.getLoc());
847  else if (IDVal == ".syntax")
848    return ParseDirectiveSyntax(DirectiveID.getLoc());
849  return true;
850}
851
852/// ParseDirectiveWord
853///  ::= .word [ expression (, expression)* ]
854bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
855  if (getLexer().isNot(AsmToken::EndOfStatement)) {
856    for (;;) {
857      const MCExpr *Value;
858      if (getParser().ParseExpression(Value))
859        return true;
860
861      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
862
863      if (getLexer().is(AsmToken::EndOfStatement))
864        break;
865
866      // FIXME: Improve diagnostic.
867      if (getLexer().isNot(AsmToken::Comma))
868        return Error(L, "unexpected token in directive");
869      Parser.Lex();
870    }
871  }
872
873  Parser.Lex();
874  return false;
875}
876
877/// ParseDirectiveThumb
878///  ::= .thumb
879bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
880  if (getLexer().isNot(AsmToken::EndOfStatement))
881    return Error(L, "unexpected token in directive");
882  Parser.Lex();
883
884  // TODO: set thumb mode
885  // TODO: tell the MC streamer the mode
886  // getParser().getStreamer().Emit???();
887  return false;
888}
889
890/// ParseDirectiveThumbFunc
891///  ::= .thumbfunc symbol_name
892bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
893  const AsmToken &Tok = Parser.getTok();
894  if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
895    return Error(L, "unexpected token in .syntax directive");
896  Parser.Lex(); // Consume the identifier token.
897
898  if (getLexer().isNot(AsmToken::EndOfStatement))
899    return Error(L, "unexpected token in directive");
900  Parser.Lex();
901
902  // TODO: mark symbol as a thumb symbol
903  // getParser().getStreamer().Emit???();
904  return false;
905}
906
907/// ParseDirectiveSyntax
908///  ::= .syntax unified | divided
909bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
910  const AsmToken &Tok = Parser.getTok();
911  if (Tok.isNot(AsmToken::Identifier))
912    return Error(L, "unexpected token in .syntax directive");
913  StringRef Mode = Tok.getString();
914  if (Mode == "unified" || Mode == "UNIFIED")
915    Parser.Lex();
916  else if (Mode == "divided" || Mode == "DIVIDED")
917    Parser.Lex();
918  else
919    return Error(L, "unrecognized syntax mode in .syntax directive");
920
921  if (getLexer().isNot(AsmToken::EndOfStatement))
922    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
923  Parser.Lex();
924
925  // TODO tell the MC streamer the mode
926  // getParser().getStreamer().Emit???();
927  return false;
928}
929
930/// ParseDirectiveCode
931///  ::= .code 16 | 32
932bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
933  const AsmToken &Tok = Parser.getTok();
934  if (Tok.isNot(AsmToken::Integer))
935    return Error(L, "unexpected token in .code directive");
936  int64_t Val = Parser.getTok().getIntVal();
937  if (Val == 16)
938    Parser.Lex();
939  else if (Val == 32)
940    Parser.Lex();
941  else
942    return Error(L, "invalid operand to .code directive");
943
944  if (getLexer().isNot(AsmToken::EndOfStatement))
945    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
946  Parser.Lex();
947
948  // TODO tell the MC streamer the mode
949  // getParser().getStreamer().Emit???();
950  return false;
951}
952
953extern "C" void LLVMInitializeARMAsmLexer();
954
955/// Force static initialization.
956extern "C" void LLVMInitializeARMAsmParser() {
957  RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
958  RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
959  LLVMInitializeARMAsmLexer();
960}
961
962#define GET_REGISTER_MATCHER
963#define GET_MATCHER_IMPLEMENTATION
964#include "ARMGenAsmMatcher.inc"
965