ARMAsmParser.cpp revision 8d5acb7007decaf0c30bf4a3d4c55e5cc2cce0a7
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARM.h"
11#include "ARMAddressingModes.h"
12#include "ARMSubtarget.h"
13#include "llvm/MC/MCParser/MCAsmLexer.h"
14#include "llvm/MC/MCParser/MCAsmParser.h"
15#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
16#include "llvm/MC/MCContext.h"
17#include "llvm/MC/MCStreamer.h"
18#include "llvm/MC/MCExpr.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/Target/TargetRegistry.h"
21#include "llvm/Target/TargetAsmParser.h"
22#include "llvm/Support/SourceMgr.h"
23#include "llvm/Support/raw_ostream.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/StringSwitch.h"
26#include "llvm/ADT/Twine.h"
27using namespace llvm;
28
29// The shift types for register controlled shifts in arm memory addressing
30enum ShiftType {
31  Lsl,
32  Lsr,
33  Asr,
34  Ror,
35  Rrx
36};
37
38namespace {
39  struct ARMOperand;
40
41class ARMAsmParser : public TargetAsmParser {
42  MCAsmParser &Parser;
43  TargetMachine &TM;
44
45  MCAsmParser &getParser() const { return Parser; }
46
47  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
48
49  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
50
51  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
52
53  int TryParseRegister();
54  ARMOperand *TryParseRegisterWithWriteBack();
55  ARMOperand *ParseRegisterList();
56  ARMOperand *ParseMemory();
57
58  bool ParseMemoryOffsetReg(bool &Negative,
59                            bool &OffsetRegShifted,
60                            enum ShiftType &ShiftType,
61                            const MCExpr *&ShiftAmount,
62                            const MCExpr *&Offset,
63                            bool &OffsetIsReg,
64                            int &OffsetRegNum,
65                            SMLoc &E);
66
67  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
68
69  ARMOperand *ParseOperand();
70
71  bool ParseDirectiveWord(unsigned Size, SMLoc L);
72
73  bool ParseDirectiveThumb(SMLoc L);
74
75  bool ParseDirectiveThumbFunc(SMLoc L);
76
77  bool ParseDirectiveCode(SMLoc L);
78
79  bool ParseDirectiveSyntax(SMLoc L);
80
81  bool MatchAndEmitInstruction(SMLoc IDLoc,
82                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
83                               MCStreamer &Out);
84
85  /// @name Auto-generated Match Functions
86  /// {
87
88#define GET_ASSEMBLER_HEADER
89#include "ARMGenAsmMatcher.inc"
90
91  /// }
92
93
94public:
95  ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
96    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
97      // Initialize the set of available features.
98      setAvailableFeatures(ComputeAvailableFeatures(
99          &TM.getSubtarget<ARMSubtarget>()));
100    }
101
102  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
103                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
104
105  virtual bool ParseDirective(AsmToken DirectiveID);
106};
107} // end anonymous namespace
108
109namespace {
110
111/// ARMOperand - Instances of this class represent a parsed ARM machine
112/// instruction.
113struct ARMOperand : public MCParsedAsmOperand {
114public:
115  enum KindTy {
116    CondCode,
117    Immediate,
118    Memory,
119    Register,
120    RegisterList,
121    Token
122  } Kind;
123
124  SMLoc StartLoc, EndLoc;
125
126  union {
127    struct {
128      ARMCC::CondCodes Val;
129    } CC;
130
131    struct {
132      const char *Data;
133      unsigned Length;
134    } Tok;
135
136    struct {
137      unsigned RegNum;
138      bool Writeback;
139    } Reg;
140
141     struct {
142      unsigned RegStart;
143      unsigned Number;
144    } RegList;
145
146    struct {
147      const MCExpr *Val;
148    } Imm;
149
150    // This is for all forms of ARM address expressions
151    struct {
152      unsigned BaseRegNum;
153      unsigned OffsetRegNum; // used when OffsetIsReg is true
154      const MCExpr *Offset; // used when OffsetIsReg is false
155      const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
156      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
157      unsigned
158        OffsetRegShifted : 1, // only used when OffsetIsReg is true
159        Preindexed : 1,
160        Postindexed : 1,
161        OffsetIsReg : 1,
162        Negative : 1, // only used when OffsetIsReg is true
163        Writeback : 1;
164    } Mem;
165
166  };
167
168  ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
169    Kind = o.Kind;
170    StartLoc = o.StartLoc;
171    EndLoc = o.EndLoc;
172    switch (Kind) {
173    case CondCode:
174      CC = o.CC;
175      break;
176    case Token:
177      Tok = o.Tok;
178      break;
179    case Register:
180      Reg = o.Reg;
181      break;
182    case RegisterList:
183      RegList = o.RegList;
184      break;
185    case Immediate:
186      Imm = o.Imm;
187      break;
188    case Memory:
189      Mem = o.Mem;
190      break;
191    }
192  }
193
194  /// getStartLoc - Get the location of the first token of this operand.
195  SMLoc getStartLoc() const { return StartLoc; }
196  /// getEndLoc - Get the location of the last token of this operand.
197  SMLoc getEndLoc() const { return EndLoc; }
198
199  ARMCC::CondCodes getCondCode() const {
200    assert(Kind == CondCode && "Invalid access!");
201    return CC.Val;
202  }
203
204  StringRef getToken() const {
205    assert(Kind == Token && "Invalid access!");
206    return StringRef(Tok.Data, Tok.Length);
207  }
208
209  unsigned getReg() const {
210    assert(Kind == Register && "Invalid access!");
211    return Reg.RegNum;
212  }
213
214  std::pair<unsigned, unsigned> getRegList() const {
215    assert(Kind == RegisterList && "Invalid access!");
216    return std::make_pair(RegList.RegStart, RegList.Number);
217  }
218
219  const MCExpr *getImm() const {
220    assert(Kind == Immediate && "Invalid access!");
221    return Imm.Val;
222  }
223
224  bool isCondCode() const { return Kind == CondCode; }
225  bool isImm() const { return Kind == Immediate; }
226  bool isReg() const { return Kind == Register; }
227  bool isRegList() const { return Kind == RegisterList; }
228  bool isToken() const { return Kind == Token; }
229  bool isMemory() const { return Kind == Memory; }
230
231  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
232    // Add as immediates when possible.  Null MCExpr = 0.
233    if (Expr == 0)
234      Inst.addOperand(MCOperand::CreateImm(0));
235    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
236      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
237    else
238      Inst.addOperand(MCOperand::CreateExpr(Expr));
239  }
240
241  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
242    assert(N == 2 && "Invalid number of operands!");
243    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
244    // FIXME: What belongs here?
245    Inst.addOperand(MCOperand::CreateReg(0));
246  }
247
248  void addRegOperands(MCInst &Inst, unsigned N) const {
249    assert(N == 1 && "Invalid number of operands!");
250    Inst.addOperand(MCOperand::CreateReg(getReg()));
251  }
252
253  void addImmOperands(MCInst &Inst, unsigned N) const {
254    assert(N == 1 && "Invalid number of operands!");
255    addExpr(Inst, getImm());
256  }
257
258
259  bool isMemMode5() const {
260    if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted ||
261        Mem.Writeback || Mem.Negative)
262      return false;
263    // If there is an offset expression, make sure it's valid.
264    if (!Mem.Offset)
265      return true;
266    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
267    if (!CE)
268      return false;
269    // The offset must be a multiple of 4 in the range 0-1020.
270    int64_t Value = CE->getValue();
271    return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
272  }
273
274  void addMemMode5Operands(MCInst &Inst, unsigned N) const {
275    assert(N == 2 && isMemMode5() && "Invalid number of operands!");
276
277    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
278    assert(!Mem.OffsetIsReg && "Invalid mode 5 operand");
279
280    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
281    // the difference?
282    if (Mem.Offset) {
283      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
284      assert(CE && "Non-constant mode 5 offset operand!");
285
286      // The MCInst offset operand doesn't include the low two bits (like
287      // the instruction encoding).
288      int64_t Offset = CE->getValue() / 4;
289      if (Offset >= 0)
290        Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
291                                                               Offset)));
292      else
293        Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
294                                                               -Offset)));
295    } else {
296      Inst.addOperand(MCOperand::CreateImm(0));
297    }
298  }
299
300  virtual void dump(raw_ostream &OS) const;
301
302  static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
303    ARMOperand *Op = new ARMOperand(CondCode);
304    Op->CC.Val = CC;
305    Op->StartLoc = S;
306    Op->EndLoc = S;
307    return Op;
308  }
309
310  static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
311    ARMOperand *Op = new ARMOperand(Token);
312    Op->Tok.Data = Str.data();
313    Op->Tok.Length = Str.size();
314    Op->StartLoc = S;
315    Op->EndLoc = S;
316    return Op;
317  }
318
319  static ARMOperand *CreateReg(unsigned RegNum, bool Writeback, SMLoc S,
320                               SMLoc E) {
321    ARMOperand *Op = new ARMOperand(Register);
322    Op->Reg.RegNum = RegNum;
323    Op->Reg.Writeback = Writeback;
324    Op->StartLoc = S;
325    Op->EndLoc = E;
326    return Op;
327  }
328
329  static ARMOperand *CreateRegList(unsigned RegStart, unsigned Number,
330                                   SMLoc S, SMLoc E) {
331    ARMOperand *Op = new ARMOperand(RegisterList);
332    Op->RegList.RegStart = RegStart;
333    Op->RegList.Number = Number;
334    Op->StartLoc = S;
335    Op->EndLoc = E;
336    return Op;
337  }
338
339  static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
340    ARMOperand *Op = new ARMOperand(Immediate);
341    Op->Imm.Val = Val;
342    Op->StartLoc = S;
343    Op->EndLoc = E;
344    return Op;
345  }
346
347  static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
348                               const MCExpr *Offset, unsigned OffsetRegNum,
349                               bool OffsetRegShifted, enum ShiftType ShiftType,
350                               const MCExpr *ShiftAmount, bool Preindexed,
351                               bool Postindexed, bool Negative, bool Writeback,
352                               SMLoc S, SMLoc E) {
353    ARMOperand *Op = new ARMOperand(Memory);
354    Op->Mem.BaseRegNum = BaseRegNum;
355    Op->Mem.OffsetIsReg = OffsetIsReg;
356    Op->Mem.Offset = Offset;
357    Op->Mem.OffsetRegNum = OffsetRegNum;
358    Op->Mem.OffsetRegShifted = OffsetRegShifted;
359    Op->Mem.ShiftType = ShiftType;
360    Op->Mem.ShiftAmount = ShiftAmount;
361    Op->Mem.Preindexed = Preindexed;
362    Op->Mem.Postindexed = Postindexed;
363    Op->Mem.Negative = Negative;
364    Op->Mem.Writeback = Writeback;
365
366    Op->StartLoc = S;
367    Op->EndLoc = E;
368    return Op;
369  }
370
371private:
372  ARMOperand(KindTy K) : Kind(K) {}
373};
374
375} // end anonymous namespace.
376
377void ARMOperand::dump(raw_ostream &OS) const {
378  switch (Kind) {
379  case CondCode:
380    OS << ARMCondCodeToString(getCondCode());
381    break;
382  case Immediate:
383    getImm()->print(OS);
384    break;
385  case Memory:
386    OS << "<memory>";
387    break;
388  case Register:
389    OS << "<register " << getReg() << ">";
390    break;
391  case RegisterList: {
392    OS << "<register_list ";
393    std::pair<unsigned, unsigned> List = getRegList();
394    unsigned RegEnd = List.first + List.second;
395
396    for (unsigned Idx = List.first; Idx < RegEnd; ) {
397      OS << Idx;
398      if (++Idx < RegEnd) OS << ", ";
399    }
400
401    OS << ">";
402    break;
403  }
404  case Token:
405    OS << "'" << getToken() << "'";
406    break;
407  }
408}
409
410/// @name Auto-generated Match Functions
411/// {
412
413static unsigned MatchRegisterName(StringRef Name);
414
415/// }
416
417/// Try to parse a register name.  The token must be an Identifier when called,
418/// and if it is a register name the token is eaten and the register number is
419/// returned.  Otherwise return -1.
420///
421int ARMAsmParser::TryParseRegister() {
422  const AsmToken &Tok = Parser.getTok();
423  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
424
425  // FIXME: Validate register for the current architecture; we have to do
426  // validation later, so maybe there is no need for this here.
427  unsigned RegNum = MatchRegisterName(Tok.getString());
428  if (RegNum == 0)
429    return -1;
430  Parser.Lex(); // Eat identifier token.
431  return RegNum;
432}
433
434
435/// Try to parse a register name.  The token must be an Identifier when called,
436/// and if it is a register name the token is eaten and the register number is
437/// returned.  Otherwise return -1.
438///
439/// TODO this is likely to change to allow different register types and or to
440/// parse for a specific register type.
441ARMOperand *ARMAsmParser::TryParseRegisterWithWriteBack() {
442  SMLoc S = Parser.getTok().getLoc();
443  int RegNo = TryParseRegister();
444  if (RegNo == -1) return 0;
445
446  SMLoc E = Parser.getTok().getLoc();
447
448  bool Writeback = false;
449  const AsmToken &ExclaimTok = Parser.getTok();
450  if (ExclaimTok.is(AsmToken::Exclaim)) {
451    E = ExclaimTok.getLoc();
452    Writeback = true;
453    Parser.Lex(); // Eat exclaim token
454  }
455
456  return ARMOperand::CreateReg(RegNo, Writeback, S, E);
457}
458
459/// Parse a register list, return it if successful else return null.  The first
460/// token must be a '{' when called.
461ARMOperand *ARMAsmParser::ParseRegisterList() {
462  SMLoc S, E;
463  assert(Parser.getTok().is(AsmToken::LCurly) &&
464         "Token is not a Left Curly Brace");
465  S = Parser.getTok().getLoc();
466  Parser.Lex(); // Eat left curly brace token.
467
468  const AsmToken &RegTok = Parser.getTok();
469  SMLoc RegLoc = RegTok.getLoc();
470  if (RegTok.isNot(AsmToken::Identifier)) {
471    Error(RegLoc, "register expected");
472    return 0;
473  }
474  int RegNum = TryParseRegister();
475  if (RegNum == -1) {
476    Error(RegLoc, "register expected");
477    return 0;
478  }
479
480  unsigned RegList = 1 << RegNum;
481
482  int HighRegNum = RegNum;
483  // TODO ranges like "{Rn-Rm}"
484  while (Parser.getTok().is(AsmToken::Comma)) {
485    Parser.Lex(); // Eat comma token.
486
487    const AsmToken &RegTok = Parser.getTok();
488    SMLoc RegLoc = RegTok.getLoc();
489    if (RegTok.isNot(AsmToken::Identifier)) {
490      Error(RegLoc, "register expected");
491      return 0;
492    }
493    int RegNum = TryParseRegister();
494    if (RegNum == -1) {
495      Error(RegLoc, "register expected");
496      return 0;
497    }
498
499    if (RegList & (1 << RegNum))
500      Warning(RegLoc, "register duplicated in register list");
501    else if (RegNum <= HighRegNum)
502      Warning(RegLoc, "register not in ascending order in register list");
503    RegList |= 1 << RegNum;
504    HighRegNum = RegNum;
505  }
506  const AsmToken &RCurlyTok = Parser.getTok();
507  if (RCurlyTok.isNot(AsmToken::RCurly)) {
508    Error(RCurlyTok.getLoc(), "'}' expected");
509    return 0;
510  }
511  E = RCurlyTok.getLoc();
512  Parser.Lex(); // Eat left curly brace token.
513
514  // FIXME: Need to return an operand!
515  Error(E, "FIXME: register list parsing not implemented");
516  return 0;
517}
518
519/// Parse an arm memory expression, return false if successful else return true
520/// or an error.  The first token must be a '[' when called.
521/// TODO Only preindexing and postindexing addressing are started, unindexed
522/// with option, etc are still to do.
523ARMOperand *ARMAsmParser::ParseMemory() {
524  SMLoc S, E;
525  assert(Parser.getTok().is(AsmToken::LBrac) &&
526         "Token is not a Left Bracket");
527  S = Parser.getTok().getLoc();
528  Parser.Lex(); // Eat left bracket token.
529
530  const AsmToken &BaseRegTok = Parser.getTok();
531  if (BaseRegTok.isNot(AsmToken::Identifier)) {
532    Error(BaseRegTok.getLoc(), "register expected");
533    return 0;
534  }
535  int BaseRegNum = TryParseRegister();
536  if (BaseRegNum == -1) {
537    Error(BaseRegTok.getLoc(), "register expected");
538    return 0;
539  }
540
541  bool Preindexed = false;
542  bool Postindexed = false;
543  bool OffsetIsReg = false;
544  bool Negative = false;
545  bool Writeback = false;
546
547  // First look for preindexed address forms, that is after the "[Rn" we now
548  // have to see if the next token is a comma.
549  const AsmToken &Tok = Parser.getTok();
550  if (Tok.is(AsmToken::Comma)) {
551    Preindexed = true;
552    Parser.Lex(); // Eat comma token.
553    int OffsetRegNum;
554    bool OffsetRegShifted;
555    enum ShiftType ShiftType;
556    const MCExpr *ShiftAmount;
557    const MCExpr *Offset;
558    if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
559                             Offset, OffsetIsReg, OffsetRegNum, E))
560      return 0;
561    const AsmToken &RBracTok = Parser.getTok();
562    if (RBracTok.isNot(AsmToken::RBrac)) {
563      Error(RBracTok.getLoc(), "']' expected");
564      return 0;
565    }
566    E = RBracTok.getLoc();
567    Parser.Lex(); // Eat right bracket token.
568
569    const AsmToken &ExclaimTok = Parser.getTok();
570    if (ExclaimTok.is(AsmToken::Exclaim)) {
571      E = ExclaimTok.getLoc();
572      Writeback = true;
573      Parser.Lex(); // Eat exclaim token
574    }
575    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
576                                 OffsetRegShifted, ShiftType, ShiftAmount,
577                                 Preindexed, Postindexed, Negative, Writeback,
578                                 S, E);
579  }
580  // The "[Rn" we have so far was not followed by a comma.
581  else if (Tok.is(AsmToken::RBrac)) {
582    // If there's anything other than the right brace, this is a post indexing
583    // addressing form.
584    E = Tok.getLoc();
585    Parser.Lex(); // Eat right bracket token.
586
587    int OffsetRegNum = 0;
588    bool OffsetRegShifted = false;
589    enum ShiftType ShiftType;
590    const MCExpr *ShiftAmount;
591    const MCExpr *Offset = 0;
592
593    const AsmToken &NextTok = Parser.getTok();
594    if (NextTok.isNot(AsmToken::EndOfStatement)) {
595      Postindexed = true;
596      Writeback = true;
597      if (NextTok.isNot(AsmToken::Comma)) {
598        Error(NextTok.getLoc(), "',' expected");
599        return 0;
600      }
601      Parser.Lex(); // Eat comma token.
602      if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
603                               ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
604                               E))
605        return 0;
606    }
607
608    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
609                                 OffsetRegShifted, ShiftType, ShiftAmount,
610                                 Preindexed, Postindexed, Negative, Writeback,
611                                 S, E);
612  }
613
614  return 0;
615}
616
617/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
618/// we will parse the following (were +/- means that a plus or minus is
619/// optional):
620///   +/-Rm
621///   +/-Rm, shift
622///   #offset
623/// we return false on success or an error otherwise.
624bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
625                                        bool &OffsetRegShifted,
626                                        enum ShiftType &ShiftType,
627                                        const MCExpr *&ShiftAmount,
628                                        const MCExpr *&Offset,
629                                        bool &OffsetIsReg,
630                                        int &OffsetRegNum,
631                                        SMLoc &E) {
632  Negative = false;
633  OffsetRegShifted = false;
634  OffsetIsReg = false;
635  OffsetRegNum = -1;
636  const AsmToken &NextTok = Parser.getTok();
637  E = NextTok.getLoc();
638  if (NextTok.is(AsmToken::Plus))
639    Parser.Lex(); // Eat plus token.
640  else if (NextTok.is(AsmToken::Minus)) {
641    Negative = true;
642    Parser.Lex(); // Eat minus token
643  }
644  // See if there is a register following the "[Rn," or "[Rn]," we have so far.
645  const AsmToken &OffsetRegTok = Parser.getTok();
646  if (OffsetRegTok.is(AsmToken::Identifier)) {
647    SMLoc CurLoc = OffsetRegTok.getLoc();
648    OffsetRegNum = TryParseRegister();
649    if (OffsetRegNum != -1) {
650      OffsetIsReg = true;
651      E = CurLoc;
652    }
653  }
654
655  // If we parsed a register as the offset then there can be a shift after that.
656  if (OffsetRegNum != -1) {
657    // Look for a comma then a shift
658    const AsmToken &Tok = Parser.getTok();
659    if (Tok.is(AsmToken::Comma)) {
660      Parser.Lex(); // Eat comma token.
661
662      const AsmToken &Tok = Parser.getTok();
663      if (ParseShift(ShiftType, ShiftAmount, E))
664        return Error(Tok.getLoc(), "shift expected");
665      OffsetRegShifted = true;
666    }
667  }
668  else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
669    // Look for #offset following the "[Rn," or "[Rn],"
670    const AsmToken &HashTok = Parser.getTok();
671    if (HashTok.isNot(AsmToken::Hash))
672      return Error(HashTok.getLoc(), "'#' expected");
673
674    Parser.Lex(); // Eat hash token.
675
676    if (getParser().ParseExpression(Offset))
677     return true;
678    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
679  }
680  return false;
681}
682
683/// ParseShift as one of these two:
684///   ( lsl | lsr | asr | ror ) , # shift_amount
685///   rrx
686/// and returns true if it parses a shift otherwise it returns false.
687bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
688                              SMLoc &E) {
689  const AsmToken &Tok = Parser.getTok();
690  if (Tok.isNot(AsmToken::Identifier))
691    return true;
692  StringRef ShiftName = Tok.getString();
693  if (ShiftName == "lsl" || ShiftName == "LSL")
694    St = Lsl;
695  else if (ShiftName == "lsr" || ShiftName == "LSR")
696    St = Lsr;
697  else if (ShiftName == "asr" || ShiftName == "ASR")
698    St = Asr;
699  else if (ShiftName == "ror" || ShiftName == "ROR")
700    St = Ror;
701  else if (ShiftName == "rrx" || ShiftName == "RRX")
702    St = Rrx;
703  else
704    return true;
705  Parser.Lex(); // Eat shift type token.
706
707  // Rrx stands alone.
708  if (St == Rrx)
709    return false;
710
711  // Otherwise, there must be a '#' and a shift amount.
712  const AsmToken &HashTok = Parser.getTok();
713  if (HashTok.isNot(AsmToken::Hash))
714    return Error(HashTok.getLoc(), "'#' expected");
715  Parser.Lex(); // Eat hash token.
716
717  if (getParser().ParseExpression(ShiftAmount))
718    return true;
719
720  return false;
721}
722
723/// Parse a arm instruction operand.  For now this parses the operand regardless
724/// of the mnemonic.
725ARMOperand *ARMAsmParser::ParseOperand() {
726  SMLoc S, E;
727
728  switch (getLexer().getKind()) {
729  case AsmToken::Identifier:
730    if (ARMOperand *Op = TryParseRegisterWithWriteBack())
731      return Op;
732
733    // This was not a register so parse other operands that start with an
734    // identifier (like labels) as expressions and create them as immediates.
735    const MCExpr *IdVal;
736    S = Parser.getTok().getLoc();
737    if (getParser().ParseExpression(IdVal))
738      return 0;
739    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
740    return ARMOperand::CreateImm(IdVal, S, E);
741  case AsmToken::LBrac:
742    return ParseMemory();
743  case AsmToken::LCurly:
744    return ParseRegisterList();
745  case AsmToken::Hash:
746    // #42 -> immediate.
747    // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
748    S = Parser.getTok().getLoc();
749    Parser.Lex();
750    const MCExpr *ImmVal;
751    if (getParser().ParseExpression(ImmVal))
752      return 0;
753    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
754    return ARMOperand::CreateImm(ImmVal, S, E);
755  default:
756    Error(Parser.getTok().getLoc(), "unexpected token in operand");
757    return 0;
758  }
759}
760
761/// Parse an arm instruction mnemonic followed by its operands.
762bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
763                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
764  // Create the leading tokens for the mnemonic, split by '.' characters.
765  size_t Start = 0, Next = Name.find('.');
766  StringRef Head = Name.slice(Start, Next);
767
768  // Determine the predicate, if any.
769  //
770  // FIXME: We need a way to check whether a prefix supports predication,
771  // otherwise we will end up with an ambiguity for instructions that happen to
772  // end with a predicate name.
773  // FIXME: Likewise, some arithmetic instructions have an 's' prefix which
774  // indicates to update the condition codes. Those instructions have an
775  // additional immediate operand which encodes the prefix as reg0 or CPSR.
776  // Just checking for a suffix of 's' definitely creates ambiguities; e.g,
777  // the SMMLS instruction.
778  unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
779    .Case("eq", ARMCC::EQ)
780    .Case("ne", ARMCC::NE)
781    .Case("hs", ARMCC::HS)
782    .Case("lo", ARMCC::LO)
783    .Case("mi", ARMCC::MI)
784    .Case("pl", ARMCC::PL)
785    .Case("vs", ARMCC::VS)
786    .Case("vc", ARMCC::VC)
787    .Case("hi", ARMCC::HI)
788    .Case("ls", ARMCC::LS)
789    .Case("ge", ARMCC::GE)
790    .Case("lt", ARMCC::LT)
791    .Case("gt", ARMCC::GT)
792    .Case("le", ARMCC::LE)
793    .Case("al", ARMCC::AL)
794    .Default(~0U);
795
796  if (CC == ~0U ||
797      (CC == ARMCC::LS && (Head == "vmls" || Head == "vnmls"))) {
798    CC = ARMCC::AL;
799  } else {
800    Head = Head.slice(0, Head.size() - 2);
801  }
802
803  Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
804  // FIXME: Should only add this operand for predicated instructions
805  Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), NameLoc));
806
807  // Add the remaining tokens in the mnemonic.
808  while (Next != StringRef::npos) {
809    Start = Next;
810    Next = Name.find('.', Start + 1);
811    Head = Name.slice(Start, Next);
812
813    Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
814  }
815
816  // Read the remaining operands.
817  if (getLexer().isNot(AsmToken::EndOfStatement)) {
818    // Read the first operand.
819    if (ARMOperand *Op = ParseOperand())
820      Operands.push_back(Op);
821    else {
822      Parser.EatToEndOfStatement();
823      return true;
824    }
825
826    while (getLexer().is(AsmToken::Comma)) {
827      Parser.Lex();  // Eat the comma.
828
829      // Parse and remember the operand.
830      if (ARMOperand *Op = ParseOperand())
831        Operands.push_back(Op);
832      else {
833        Parser.EatToEndOfStatement();
834        return true;
835      }
836    }
837  }
838
839  if (getLexer().isNot(AsmToken::EndOfStatement)) {
840    Parser.EatToEndOfStatement();
841    return TokError("unexpected token in argument list");
842  }
843  Parser.Lex(); // Consume the EndOfStatement
844  return false;
845}
846
847bool ARMAsmParser::
848MatchAndEmitInstruction(SMLoc IDLoc,
849                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
850                        MCStreamer &Out) {
851  MCInst Inst;
852  unsigned ErrorInfo;
853  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
854  case Match_Success:
855    Out.EmitInstruction(Inst);
856    return false;
857
858  case Match_MissingFeature:
859    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
860    return true;
861  case Match_InvalidOperand: {
862    SMLoc ErrorLoc = IDLoc;
863    if (ErrorInfo != ~0U) {
864      if (ErrorInfo >= Operands.size())
865        return Error(IDLoc, "too few operands for instruction");
866
867      ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
868      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
869    }
870
871    return Error(ErrorLoc, "invalid operand for instruction");
872  }
873  case Match_MnemonicFail:
874    return Error(IDLoc, "unrecognized instruction mnemonic");
875  }
876
877  llvm_unreachable("Implement any new match types added!");
878}
879
880
881
882/// ParseDirective parses the arm specific directives
883bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
884  StringRef IDVal = DirectiveID.getIdentifier();
885  if (IDVal == ".word")
886    return ParseDirectiveWord(4, DirectiveID.getLoc());
887  else if (IDVal == ".thumb")
888    return ParseDirectiveThumb(DirectiveID.getLoc());
889  else if (IDVal == ".thumb_func")
890    return ParseDirectiveThumbFunc(DirectiveID.getLoc());
891  else if (IDVal == ".code")
892    return ParseDirectiveCode(DirectiveID.getLoc());
893  else if (IDVal == ".syntax")
894    return ParseDirectiveSyntax(DirectiveID.getLoc());
895  return true;
896}
897
898/// ParseDirectiveWord
899///  ::= .word [ expression (, expression)* ]
900bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
901  if (getLexer().isNot(AsmToken::EndOfStatement)) {
902    for (;;) {
903      const MCExpr *Value;
904      if (getParser().ParseExpression(Value))
905        return true;
906
907      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
908
909      if (getLexer().is(AsmToken::EndOfStatement))
910        break;
911
912      // FIXME: Improve diagnostic.
913      if (getLexer().isNot(AsmToken::Comma))
914        return Error(L, "unexpected token in directive");
915      Parser.Lex();
916    }
917  }
918
919  Parser.Lex();
920  return false;
921}
922
923/// ParseDirectiveThumb
924///  ::= .thumb
925bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
926  if (getLexer().isNot(AsmToken::EndOfStatement))
927    return Error(L, "unexpected token in directive");
928  Parser.Lex();
929
930  // TODO: set thumb mode
931  // TODO: tell the MC streamer the mode
932  // getParser().getStreamer().Emit???();
933  return false;
934}
935
936/// ParseDirectiveThumbFunc
937///  ::= .thumbfunc symbol_name
938bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
939  const AsmToken &Tok = Parser.getTok();
940  if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
941    return Error(L, "unexpected token in .thumb_func directive");
942  StringRef Name = Tok.getString();
943  Parser.Lex(); // Consume the identifier token.
944  if (getLexer().isNot(AsmToken::EndOfStatement))
945    return Error(L, "unexpected token in directive");
946  Parser.Lex();
947
948  // Mark symbol as a thumb symbol.
949  MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
950  getParser().getStreamer().EmitThumbFunc(Func);
951  return false;
952}
953
954/// ParseDirectiveSyntax
955///  ::= .syntax unified | divided
956bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
957  const AsmToken &Tok = Parser.getTok();
958  if (Tok.isNot(AsmToken::Identifier))
959    return Error(L, "unexpected token in .syntax directive");
960  StringRef Mode = Tok.getString();
961  if (Mode == "unified" || Mode == "UNIFIED")
962    Parser.Lex();
963  else if (Mode == "divided" || Mode == "DIVIDED")
964    Parser.Lex();
965  else
966    return Error(L, "unrecognized syntax mode in .syntax directive");
967
968  if (getLexer().isNot(AsmToken::EndOfStatement))
969    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
970  Parser.Lex();
971
972  // TODO tell the MC streamer the mode
973  // getParser().getStreamer().Emit???();
974  return false;
975}
976
977/// ParseDirectiveCode
978///  ::= .code 16 | 32
979bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
980  const AsmToken &Tok = Parser.getTok();
981  if (Tok.isNot(AsmToken::Integer))
982    return Error(L, "unexpected token in .code directive");
983  int64_t Val = Parser.getTok().getIntVal();
984  if (Val == 16)
985    Parser.Lex();
986  else if (Val == 32)
987    Parser.Lex();
988  else
989    return Error(L, "invalid operand to .code directive");
990
991  if (getLexer().isNot(AsmToken::EndOfStatement))
992    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
993  Parser.Lex();
994
995  if (Val == 16)
996    getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
997  else
998    getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
999
1000  return false;
1001}
1002
1003extern "C" void LLVMInitializeARMAsmLexer();
1004
1005/// Force static initialization.
1006extern "C" void LLVMInitializeARMAsmParser() {
1007  RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
1008  RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
1009  LLVMInitializeARMAsmLexer();
1010}
1011
1012#define GET_REGISTER_MATCHER
1013#define GET_MATCHER_IMPLEMENTATION
1014#include "ARMGenAsmMatcher.inc"
1015