ARMAsmParser.cpp revision 80eb233a3ce1a6f2e6c0847cb3e456d735e37569
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARM.h"
11#include "ARMSubtarget.h"
12#include "llvm/MC/MCParser/MCAsmLexer.h"
13#include "llvm/MC/MCParser/MCAsmParser.h"
14#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
15#include "llvm/MC/MCStreamer.h"
16#include "llvm/MC/MCExpr.h"
17#include "llvm/MC/MCInst.h"
18#include "llvm/Target/TargetRegistry.h"
19#include "llvm/Target/TargetAsmParser.h"
20#include "llvm/Support/SourceMgr.h"
21#include "llvm/Support/raw_ostream.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringSwitch.h"
24#include "llvm/ADT/Twine.h"
25using namespace llvm;
26
27// The shift types for register controlled shifts in arm memory addressing
28enum ShiftType {
29  Lsl,
30  Lsr,
31  Asr,
32  Ror,
33  Rrx
34};
35
36namespace {
37  struct ARMOperand;
38
39class ARMAsmParser : public TargetAsmParser {
40  MCAsmParser &Parser;
41  TargetMachine &TM;
42
43private:
44  MCAsmParser &getParser() const { return Parser; }
45
46  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
47
48  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
49
50  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
51
52  ARMOperand *MaybeParseRegister(bool ParseWriteBack);
53  ARMOperand *ParseRegisterList();
54  ARMOperand *ParseMemory();
55
56  bool ParseMemoryOffsetReg(bool &Negative,
57                            bool &OffsetRegShifted,
58                            enum ShiftType &ShiftType,
59                            const MCExpr *&ShiftAmount,
60                            const MCExpr *&Offset,
61                            bool &OffsetIsReg,
62                            int &OffsetRegNum,
63                            SMLoc &E);
64
65  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
66
67  ARMOperand *ParseOperand();
68
69  bool ParseDirectiveWord(unsigned Size, SMLoc L);
70
71  bool ParseDirectiveThumb(SMLoc L);
72
73  bool ParseDirectiveThumbFunc(SMLoc L);
74
75  bool ParseDirectiveCode(SMLoc L);
76
77  bool ParseDirectiveSyntax(SMLoc L);
78
79  bool MatchAndEmitInstruction(SMLoc IDLoc,
80                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
81                               MCStreamer &Out);
82
83  /// @name Auto-generated Match Functions
84  /// {
85
86#define GET_ASSEMBLER_HEADER
87#include "ARMGenAsmMatcher.inc"
88
89  /// }
90
91
92public:
93  ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
94    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
95
96  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
97                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
98
99  virtual bool ParseDirective(AsmToken DirectiveID);
100};
101} // end anonymous namespace
102
103namespace {
104
105/// ARMOperand - Instances of this class represent a parsed ARM machine
106/// instruction.
107struct ARMOperand : public MCParsedAsmOperand {
108public:
109  enum KindTy {
110    CondCode,
111    Immediate,
112    Memory,
113    Register,
114    Token
115  } Kind;
116
117  SMLoc StartLoc, EndLoc;
118
119  union {
120    struct {
121      ARMCC::CondCodes Val;
122    } CC;
123
124    struct {
125      const char *Data;
126      unsigned Length;
127    } Tok;
128
129    struct {
130      unsigned RegNum;
131      bool Writeback;
132    } Reg;
133
134    struct {
135      const MCExpr *Val;
136    } Imm;
137
138    // This is for all forms of ARM address expressions
139    struct {
140      unsigned BaseRegNum;
141      unsigned OffsetRegNum; // used when OffsetIsReg is true
142      const MCExpr *Offset; // used when OffsetIsReg is false
143      const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
144      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
145      unsigned
146        OffsetRegShifted : 1, // only used when OffsetIsReg is true
147        Preindexed : 1,
148        Postindexed : 1,
149        OffsetIsReg : 1,
150        Negative : 1, // only used when OffsetIsReg is true
151        Writeback : 1;
152    } Mem;
153
154  };
155
156  ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
157    Kind = o.Kind;
158    StartLoc = o.StartLoc;
159    EndLoc = o.EndLoc;
160    switch (Kind) {
161    case CondCode:
162      CC = o.CC;
163      break;
164    case Token:
165      Tok = o.Tok;
166      break;
167    case Register:
168      Reg = o.Reg;
169      break;
170    case Immediate:
171      Imm = o.Imm;
172      break;
173    case Memory:
174      Mem = o.Mem;
175      break;
176    }
177  }
178
179  /// getStartLoc - Get the location of the first token of this operand.
180  SMLoc getStartLoc() const { return StartLoc; }
181  /// getEndLoc - Get the location of the last token of this operand.
182  SMLoc getEndLoc() const { return EndLoc; }
183
184  ARMCC::CondCodes getCondCode() const {
185    assert(Kind == CondCode && "Invalid access!");
186    return CC.Val;
187  }
188
189  StringRef getToken() const {
190    assert(Kind == Token && "Invalid access!");
191    return StringRef(Tok.Data, Tok.Length);
192  }
193
194  unsigned getReg() const {
195    assert(Kind == Register && "Invalid access!");
196    return Reg.RegNum;
197  }
198
199  const MCExpr *getImm() const {
200    assert(Kind == Immediate && "Invalid access!");
201    return Imm.Val;
202  }
203
204  bool isCondCode() const { return Kind == CondCode; }
205  bool isImm() const { return Kind == Immediate; }
206  bool isReg() const { return Kind == Register; }
207  bool isToken() const { return Kind == Token; }
208  bool isMemory() const { return Kind == Memory; }
209
210  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
211    // Add as immediates when possible.  Null MCExpr = 0.
212    if (Expr == 0)
213      Inst.addOperand(MCOperand::CreateImm(0));
214    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
215      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
216    else
217      Inst.addOperand(MCOperand::CreateExpr(Expr));
218  }
219
220  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
221    assert(N == 2 && "Invalid number of operands!");
222    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
223    // FIXME: What belongs here?
224    Inst.addOperand(MCOperand::CreateReg(0));
225  }
226
227  void addRegOperands(MCInst &Inst, unsigned N) const {
228    assert(N == 1 && "Invalid number of operands!");
229    Inst.addOperand(MCOperand::CreateReg(getReg()));
230  }
231
232  void addImmOperands(MCInst &Inst, unsigned N) const {
233    assert(N == 1 && "Invalid number of operands!");
234    addExpr(Inst, getImm());
235  }
236
237
238  bool isMemMode5() const {
239    if (!isMemory() || Mem.OffsetIsReg || Mem.OffsetRegShifted ||
240        Mem.Writeback || Mem.Negative)
241      return false;
242    // If there is an offset expression, make sure it's valid.
243    if (!Mem.Offset)
244      return true;
245    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
246    if (!CE)
247      return false;
248    // The offset must be a multiple of 4 in the range 0-1020.
249    int64_t Value = CE->getValue();
250    return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
251  }
252
253  void addMemMode5Operands(MCInst &Inst, unsigned N) const {
254    assert(N == 2 && isMemMode5() && "Invalid number of operands!");
255
256    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
257    assert(!Mem.OffsetIsReg && "invalid mode 5 operand");
258    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
259    // the difference?
260    if (Mem.Offset) {
261      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.Offset);
262      assert (CE && "non-constant mode 5 offset operand!");
263      // The MCInst offset operand doesn't include the low two bits (like
264      // the instruction encoding).
265      Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
266    } else
267      Inst.addOperand(MCOperand::CreateImm(0));
268  }
269
270  virtual void dump(raw_ostream &OS) const;
271
272  static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
273    ARMOperand *Op = new ARMOperand(CondCode);
274    Op->CC.Val = CC;
275    Op->StartLoc = S;
276    Op->EndLoc = S;
277    return Op;
278  }
279
280  static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
281    ARMOperand *Op = new ARMOperand(Token);
282    Op->Tok.Data = Str.data();
283    Op->Tok.Length = Str.size();
284    Op->StartLoc = S;
285    Op->EndLoc = S;
286    return Op;
287  }
288
289  static ARMOperand *CreateReg(unsigned RegNum, bool Writeback, SMLoc S,
290                               SMLoc E) {
291    ARMOperand *Op = new ARMOperand(Register);
292    Op->Reg.RegNum = RegNum;
293    Op->Reg.Writeback = Writeback;
294    Op->StartLoc = S;
295    Op->EndLoc = E;
296    return Op;
297  }
298
299  static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
300    ARMOperand *Op = new ARMOperand(Immediate);
301    Op->Imm.Val = Val;
302    Op->StartLoc = S;
303    Op->EndLoc = E;
304    return Op;
305  }
306
307  static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
308                               const MCExpr *Offset, unsigned OffsetRegNum,
309                               bool OffsetRegShifted, enum ShiftType ShiftType,
310                               const MCExpr *ShiftAmount, bool Preindexed,
311                               bool Postindexed, bool Negative, bool Writeback,
312                               SMLoc S, SMLoc E) {
313    ARMOperand *Op = new ARMOperand(Memory);
314    Op->Mem.BaseRegNum = BaseRegNum;
315    Op->Mem.OffsetIsReg = OffsetIsReg;
316    Op->Mem.Offset = Offset;
317    Op->Mem.OffsetRegNum = OffsetRegNum;
318    Op->Mem.OffsetRegShifted = OffsetRegShifted;
319    Op->Mem.ShiftType = ShiftType;
320    Op->Mem.ShiftAmount = ShiftAmount;
321    Op->Mem.Preindexed = Preindexed;
322    Op->Mem.Postindexed = Postindexed;
323    Op->Mem.Negative = Negative;
324    Op->Mem.Writeback = Writeback;
325
326    Op->StartLoc = S;
327    Op->EndLoc = E;
328    return Op;
329  }
330
331private:
332  ARMOperand(KindTy K) : Kind(K) {}
333};
334
335} // end anonymous namespace.
336
337void ARMOperand::dump(raw_ostream &OS) const {
338  switch (Kind) {
339  case CondCode:
340    OS << ARMCondCodeToString(getCondCode());
341    break;
342  case Immediate:
343    getImm()->print(OS);
344    break;
345  case Memory:
346    OS << "<memory>";
347    break;
348  case Register:
349    OS << "<register " << getReg() << ">";
350    break;
351  case Token:
352    OS << "'" << getToken() << "'";
353    break;
354  }
355}
356
357/// @name Auto-generated Match Functions
358/// {
359
360static unsigned MatchRegisterName(StringRef Name);
361
362/// }
363
364/// Try to parse a register name.  The token must be an Identifier when called,
365/// and if it is a register name the token is eaten and a Reg operand is created
366/// and returned.  Otherwise return null.
367///
368/// TODO this is likely to change to allow different register types and or to
369/// parse for a specific register type.
370ARMOperand *ARMAsmParser::MaybeParseRegister(bool ParseWriteBack) {
371  SMLoc S, E;
372  const AsmToken &Tok = Parser.getTok();
373  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
374
375  // FIXME: Validate register for the current architecture; we have to do
376  // validation later, so maybe there is no need for this here.
377  int RegNum;
378
379  RegNum = MatchRegisterName(Tok.getString());
380  if (RegNum == -1)
381    return 0;
382
383  S = Tok.getLoc();
384
385  Parser.Lex(); // Eat identifier token.
386
387  E = Parser.getTok().getLoc();
388
389  bool Writeback = false;
390  if (ParseWriteBack) {
391    const AsmToken &ExclaimTok = Parser.getTok();
392    if (ExclaimTok.is(AsmToken::Exclaim)) {
393      E = ExclaimTok.getLoc();
394      Writeback = true;
395      Parser.Lex(); // Eat exclaim token
396    }
397  }
398
399  return ARMOperand::CreateReg(RegNum, Writeback, S, E);
400}
401
402/// Parse a register list, return it if successful else return null.  The first
403/// token must be a '{' when called.
404ARMOperand *ARMAsmParser::ParseRegisterList() {
405  SMLoc S, E;
406  assert(Parser.getTok().is(AsmToken::LCurly) &&
407         "Token is not an Left Curly Brace");
408  S = Parser.getTok().getLoc();
409  Parser.Lex(); // Eat left curly brace token.
410
411  const AsmToken &RegTok = Parser.getTok();
412  SMLoc RegLoc = RegTok.getLoc();
413  if (RegTok.isNot(AsmToken::Identifier)) {
414    Error(RegLoc, "register expected");
415    return 0;
416  }
417  int RegNum = MatchRegisterName(RegTok.getString());
418  if (RegNum == -1) {
419    Error(RegLoc, "register expected");
420    return 0;
421  }
422
423  Parser.Lex(); // Eat identifier token.
424  unsigned RegList = 1 << RegNum;
425
426  int HighRegNum = RegNum;
427  // TODO ranges like "{Rn-Rm}"
428  while (Parser.getTok().is(AsmToken::Comma)) {
429    Parser.Lex(); // Eat comma token.
430
431    const AsmToken &RegTok = Parser.getTok();
432    SMLoc RegLoc = RegTok.getLoc();
433    if (RegTok.isNot(AsmToken::Identifier)) {
434      Error(RegLoc, "register expected");
435      return 0;
436    }
437    int RegNum = MatchRegisterName(RegTok.getString());
438    if (RegNum == -1) {
439      Error(RegLoc, "register expected");
440      return 0;
441    }
442
443    if (RegList & (1 << RegNum))
444      Warning(RegLoc, "register duplicated in register list");
445    else if (RegNum <= HighRegNum)
446      Warning(RegLoc, "register not in ascending order in register list");
447    RegList |= 1 << RegNum;
448    HighRegNum = RegNum;
449
450    Parser.Lex(); // Eat identifier token.
451  }
452  const AsmToken &RCurlyTok = Parser.getTok();
453  if (RCurlyTok.isNot(AsmToken::RCurly)) {
454    Error(RCurlyTok.getLoc(), "'}' expected");
455    return 0;
456  }
457  E = RCurlyTok.getLoc();
458  Parser.Lex(); // Eat left curly brace token.
459
460  // FIXME: Need to return an operand!
461  Error(E, "FIXME: register list parsing not implemented");
462  return 0;
463}
464
465/// Parse an arm memory expression, return false if successful else return true
466/// or an error.  The first token must be a '[' when called.
467/// TODO Only preindexing and postindexing addressing are started, unindexed
468/// with option, etc are still to do.
469ARMOperand *ARMAsmParser::ParseMemory() {
470  SMLoc S, E;
471  assert(Parser.getTok().is(AsmToken::LBrac) &&
472         "Token is not an Left Bracket");
473  S = Parser.getTok().getLoc();
474  Parser.Lex(); // Eat left bracket token.
475
476  const AsmToken &BaseRegTok = Parser.getTok();
477  if (BaseRegTok.isNot(AsmToken::Identifier)) {
478    Error(BaseRegTok.getLoc(), "register expected");
479    return 0;
480  }
481  int BaseRegNum = 0;
482  if (ARMOperand *Op = MaybeParseRegister(false)) {
483    BaseRegNum = Op->getReg();
484    delete Op;
485  } else {
486    Error(BaseRegTok.getLoc(), "register expected");
487    return 0;
488  }
489
490  bool Preindexed = false;
491  bool Postindexed = false;
492  bool OffsetIsReg = false;
493  bool Negative = false;
494  bool Writeback = false;
495
496  // First look for preindexed address forms, that is after the "[Rn" we now
497  // have to see if the next token is a comma.
498  const AsmToken &Tok = Parser.getTok();
499  if (Tok.is(AsmToken::Comma)) {
500    Preindexed = true;
501    Parser.Lex(); // Eat comma token.
502    int OffsetRegNum;
503    bool OffsetRegShifted;
504    enum ShiftType ShiftType;
505    const MCExpr *ShiftAmount;
506    const MCExpr *Offset;
507    if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
508                             Offset, OffsetIsReg, OffsetRegNum, E))
509      return 0;
510    const AsmToken &RBracTok = Parser.getTok();
511    if (RBracTok.isNot(AsmToken::RBrac)) {
512      Error(RBracTok.getLoc(), "']' expected");
513      return 0;
514    }
515    E = RBracTok.getLoc();
516    Parser.Lex(); // Eat right bracket token.
517
518    const AsmToken &ExclaimTok = Parser.getTok();
519    if (ExclaimTok.is(AsmToken::Exclaim)) {
520      E = ExclaimTok.getLoc();
521      Writeback = true;
522      Parser.Lex(); // Eat exclaim token
523    }
524    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
525                                 OffsetRegShifted, ShiftType, ShiftAmount,
526                                 Preindexed, Postindexed, Negative, Writeback,
527                                 S, E);
528  }
529  // The "[Rn" we have so far was not followed by a comma.
530  else if (Tok.is(AsmToken::RBrac)) {
531    // If there's anything other than the right brace, this is a post indexing
532    // addressing form.
533    E = Tok.getLoc();
534    Parser.Lex(); // Eat right bracket token.
535
536    int OffsetRegNum = 0;
537    bool OffsetRegShifted = false;
538    enum ShiftType ShiftType;
539    const MCExpr *ShiftAmount;
540    const MCExpr *Offset = 0;
541
542    const AsmToken &NextTok = Parser.getTok();
543    if (NextTok.isNot(AsmToken::EndOfStatement)) {
544      Postindexed = true;
545      Writeback = true;
546      if (NextTok.isNot(AsmToken::Comma)) {
547        Error(NextTok.getLoc(), "',' expected");
548        return 0;
549      }
550      Parser.Lex(); // Eat comma token.
551      if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
552                               ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
553                               E))
554        return 0;
555    }
556
557    return ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
558                                 OffsetRegShifted, ShiftType, ShiftAmount,
559                                 Preindexed, Postindexed, Negative, Writeback,
560                                 S, E);
561  }
562
563  return 0;
564}
565
566/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
567/// we will parse the following (were +/- means that a plus or minus is
568/// optional):
569///   +/-Rm
570///   +/-Rm, shift
571///   #offset
572/// we return false on success or an error otherwise.
573bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
574                                        bool &OffsetRegShifted,
575                                        enum ShiftType &ShiftType,
576                                        const MCExpr *&ShiftAmount,
577                                        const MCExpr *&Offset,
578                                        bool &OffsetIsReg,
579                                        int &OffsetRegNum,
580                                        SMLoc &E) {
581  Negative = false;
582  OffsetRegShifted = false;
583  OffsetIsReg = false;
584  OffsetRegNum = -1;
585  const AsmToken &NextTok = Parser.getTok();
586  E = NextTok.getLoc();
587  if (NextTok.is(AsmToken::Plus))
588    Parser.Lex(); // Eat plus token.
589  else if (NextTok.is(AsmToken::Minus)) {
590    Negative = true;
591    Parser.Lex(); // Eat minus token
592  }
593  // See if there is a register following the "[Rn," or "[Rn]," we have so far.
594  const AsmToken &OffsetRegTok = Parser.getTok();
595  if (OffsetRegTok.is(AsmToken::Identifier)) {
596    if (ARMOperand *Op = MaybeParseRegister(false)) {
597      OffsetIsReg = true;
598      E = Op->getEndLoc();
599      OffsetRegNum = Op->getReg();
600      delete Op;
601    }
602  }
603  // If we parsed a register as the offset then their can be a shift after that
604  if (OffsetRegNum != -1) {
605    // Look for a comma then a shift
606    const AsmToken &Tok = Parser.getTok();
607    if (Tok.is(AsmToken::Comma)) {
608      Parser.Lex(); // Eat comma token.
609
610      const AsmToken &Tok = Parser.getTok();
611      if (ParseShift(ShiftType, ShiftAmount, E))
612        return Error(Tok.getLoc(), "shift expected");
613      OffsetRegShifted = true;
614    }
615  }
616  else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
617    // Look for #offset following the "[Rn," or "[Rn],"
618    const AsmToken &HashTok = Parser.getTok();
619    if (HashTok.isNot(AsmToken::Hash))
620      return Error(HashTok.getLoc(), "'#' expected");
621
622    Parser.Lex(); // Eat hash token.
623
624    if (getParser().ParseExpression(Offset))
625     return true;
626    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
627  }
628  return false;
629}
630
631/// ParseShift as one of these two:
632///   ( lsl | lsr | asr | ror ) , # shift_amount
633///   rrx
634/// and returns true if it parses a shift otherwise it returns false.
635bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
636                              SMLoc &E) {
637  const AsmToken &Tok = Parser.getTok();
638  if (Tok.isNot(AsmToken::Identifier))
639    return true;
640  StringRef ShiftName = Tok.getString();
641  if (ShiftName == "lsl" || ShiftName == "LSL")
642    St = Lsl;
643  else if (ShiftName == "lsr" || ShiftName == "LSR")
644    St = Lsr;
645  else if (ShiftName == "asr" || ShiftName == "ASR")
646    St = Asr;
647  else if (ShiftName == "ror" || ShiftName == "ROR")
648    St = Ror;
649  else if (ShiftName == "rrx" || ShiftName == "RRX")
650    St = Rrx;
651  else
652    return true;
653  Parser.Lex(); // Eat shift type token.
654
655  // Rrx stands alone.
656  if (St == Rrx)
657    return false;
658
659  // Otherwise, there must be a '#' and a shift amount.
660  const AsmToken &HashTok = Parser.getTok();
661  if (HashTok.isNot(AsmToken::Hash))
662    return Error(HashTok.getLoc(), "'#' expected");
663  Parser.Lex(); // Eat hash token.
664
665  if (getParser().ParseExpression(ShiftAmount))
666    return true;
667
668  return false;
669}
670
671/// Parse a arm instruction operand.  For now this parses the operand regardless
672/// of the mnemonic.
673ARMOperand *ARMAsmParser::ParseOperand() {
674  SMLoc S, E;
675
676  switch (getLexer().getKind()) {
677  case AsmToken::Identifier:
678    if (ARMOperand *Op = MaybeParseRegister(true))
679      return Op;
680
681    // This was not a register so parse other operands that start with an
682    // identifier (like labels) as expressions and create them as immediates.
683    const MCExpr *IdVal;
684    S = Parser.getTok().getLoc();
685    if (getParser().ParseExpression(IdVal))
686      return 0;
687    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
688    return ARMOperand::CreateImm(IdVal, S, E);
689  case AsmToken::LBrac:
690    return ParseMemory();
691  case AsmToken::LCurly:
692    return ParseRegisterList();
693  case AsmToken::Hash:
694    // #42 -> immediate.
695    // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
696    S = Parser.getTok().getLoc();
697    Parser.Lex();
698    const MCExpr *ImmVal;
699    if (getParser().ParseExpression(ImmVal))
700      return 0;
701    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
702    return ARMOperand::CreateImm(ImmVal, S, E);
703  default:
704    Error(Parser.getTok().getLoc(), "unexpected token in operand");
705    return 0;
706  }
707}
708
709/// Parse an arm instruction mnemonic followed by its operands.
710bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
711                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
712  // Create the leading tokens for the mnemonic, split by '.' characters.
713  size_t Start = 0, Next = Name.find('.');
714  StringRef Head = Name.slice(Start, Next);
715
716  // Determine the predicate, if any.
717  //
718  // FIXME: We need a way to check whether a prefix supports predication,
719  // otherwise we will end up with an ambiguity for instructions that happen to
720  // end with a predicate name.
721  unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
722    .Case("eq", ARMCC::EQ)
723    .Case("ne", ARMCC::NE)
724    .Case("hs", ARMCC::HS)
725    .Case("lo", ARMCC::LO)
726    .Case("mi", ARMCC::MI)
727    .Case("pl", ARMCC::PL)
728    .Case("vs", ARMCC::VS)
729    .Case("vc", ARMCC::VC)
730    .Case("hi", ARMCC::HI)
731    .Case("ls", ARMCC::LS)
732    .Case("ge", ARMCC::GE)
733    .Case("lt", ARMCC::LT)
734    .Case("gt", ARMCC::GT)
735    .Case("le", ARMCC::LE)
736    .Case("al", ARMCC::AL)
737    .Default(~0U);
738
739  if (CC != ~0U)
740    Head = Head.slice(0, Head.size() - 2);
741  else
742    CC = ARMCC::AL;
743
744  Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
745  Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), NameLoc));
746
747  // Add the remaining tokens in the mnemonic.
748  while (Next != StringRef::npos) {
749    Start = Next;
750    Next = Name.find('.', Start + 1);
751    Head = Name.slice(Start, Next);
752
753    Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
754  }
755
756  // Read the remaining operands.
757  if (getLexer().isNot(AsmToken::EndOfStatement)) {
758    // Read the first operand.
759    if (ARMOperand *Op = ParseOperand())
760      Operands.push_back(Op);
761    else {
762      Parser.EatToEndOfStatement();
763      return true;
764    }
765
766    while (getLexer().is(AsmToken::Comma)) {
767      Parser.Lex();  // Eat the comma.
768
769      // Parse and remember the operand.
770      if (ARMOperand *Op = ParseOperand())
771        Operands.push_back(Op);
772      else {
773        Parser.EatToEndOfStatement();
774        return true;
775      }
776    }
777  }
778
779  if (getLexer().isNot(AsmToken::EndOfStatement)) {
780    Parser.EatToEndOfStatement();
781    return TokError("unexpected token in argument list");
782  }
783  Parser.Lex(); // Consume the EndOfStatement
784  return false;
785}
786
787bool ARMAsmParser::
788MatchAndEmitInstruction(SMLoc IDLoc,
789                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
790                        MCStreamer &Out) {
791  MCInst Inst;
792  unsigned ErrorInfo;
793  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
794  case Match_Success:
795    Out.EmitInstruction(Inst);
796    return false;
797
798  case Match_MissingFeature:
799    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
800    return true;
801  case Match_InvalidOperand: {
802    SMLoc ErrorLoc = IDLoc;
803    if (ErrorInfo != ~0U) {
804      if (ErrorInfo >= Operands.size())
805        return Error(IDLoc, "too few operands for instruction");
806
807      ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
808      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
809    }
810
811    return Error(ErrorLoc, "invalid operand for instruction");
812  }
813  case Match_MnemonicFail:
814    return Error(IDLoc, "unrecognized instruction mnemonic");
815  }
816
817  llvm_unreachable("Implement any new match types added!");
818}
819
820
821
822/// ParseDirective parses the arm specific directives
823bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
824  StringRef IDVal = DirectiveID.getIdentifier();
825  if (IDVal == ".word")
826    return ParseDirectiveWord(4, DirectiveID.getLoc());
827  else if (IDVal == ".thumb")
828    return ParseDirectiveThumb(DirectiveID.getLoc());
829  else if (IDVal == ".thumb_func")
830    return ParseDirectiveThumbFunc(DirectiveID.getLoc());
831  else if (IDVal == ".code")
832    return ParseDirectiveCode(DirectiveID.getLoc());
833  else if (IDVal == ".syntax")
834    return ParseDirectiveSyntax(DirectiveID.getLoc());
835  return true;
836}
837
838/// ParseDirectiveWord
839///  ::= .word [ expression (, expression)* ]
840bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
841  if (getLexer().isNot(AsmToken::EndOfStatement)) {
842    for (;;) {
843      const MCExpr *Value;
844      if (getParser().ParseExpression(Value))
845        return true;
846
847      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
848
849      if (getLexer().is(AsmToken::EndOfStatement))
850        break;
851
852      // FIXME: Improve diagnostic.
853      if (getLexer().isNot(AsmToken::Comma))
854        return Error(L, "unexpected token in directive");
855      Parser.Lex();
856    }
857  }
858
859  Parser.Lex();
860  return false;
861}
862
863/// ParseDirectiveThumb
864///  ::= .thumb
865bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
866  if (getLexer().isNot(AsmToken::EndOfStatement))
867    return Error(L, "unexpected token in directive");
868  Parser.Lex();
869
870  // TODO: set thumb mode
871  // TODO: tell the MC streamer the mode
872  // getParser().getStreamer().Emit???();
873  return false;
874}
875
876/// ParseDirectiveThumbFunc
877///  ::= .thumbfunc symbol_name
878bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
879  const AsmToken &Tok = Parser.getTok();
880  if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
881    return Error(L, "unexpected token in .syntax directive");
882  Parser.Lex(); // Consume the identifier token.
883
884  if (getLexer().isNot(AsmToken::EndOfStatement))
885    return Error(L, "unexpected token in directive");
886  Parser.Lex();
887
888  // TODO: mark symbol as a thumb symbol
889  // getParser().getStreamer().Emit???();
890  return false;
891}
892
893/// ParseDirectiveSyntax
894///  ::= .syntax unified | divided
895bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
896  const AsmToken &Tok = Parser.getTok();
897  if (Tok.isNot(AsmToken::Identifier))
898    return Error(L, "unexpected token in .syntax directive");
899  StringRef Mode = Tok.getString();
900  if (Mode == "unified" || Mode == "UNIFIED")
901    Parser.Lex();
902  else if (Mode == "divided" || Mode == "DIVIDED")
903    Parser.Lex();
904  else
905    return Error(L, "unrecognized syntax mode in .syntax directive");
906
907  if (getLexer().isNot(AsmToken::EndOfStatement))
908    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
909  Parser.Lex();
910
911  // TODO tell the MC streamer the mode
912  // getParser().getStreamer().Emit???();
913  return false;
914}
915
916/// ParseDirectiveCode
917///  ::= .code 16 | 32
918bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
919  const AsmToken &Tok = Parser.getTok();
920  if (Tok.isNot(AsmToken::Integer))
921    return Error(L, "unexpected token in .code directive");
922  int64_t Val = Parser.getTok().getIntVal();
923  if (Val == 16)
924    Parser.Lex();
925  else if (Val == 32)
926    Parser.Lex();
927  else
928    return Error(L, "invalid operand to .code directive");
929
930  if (getLexer().isNot(AsmToken::EndOfStatement))
931    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
932  Parser.Lex();
933
934  // TODO tell the MC streamer the mode
935  // getParser().getStreamer().Emit???();
936  return false;
937}
938
939extern "C" void LLVMInitializeARMAsmLexer();
940
941/// Force static initialization.
942extern "C" void LLVMInitializeARMAsmParser() {
943  RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
944  RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
945  LLVMInitializeARMAsmLexer();
946}
947
948#define GET_REGISTER_MATCHER
949#define GET_MATCHER_IMPLEMENTATION
950#include "ARMGenAsmMatcher.inc"
951