ARMAsmParser.cpp revision d7894f105a3c397a3d7f5c5136eee39f5865e64b
1//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARM.h"
11#include "llvm/ADT/SmallVector.h"
12#include "llvm/ADT/Twine.h"
13#include "llvm/MC/MCAsmLexer.h"
14#include "llvm/MC/MCAsmParser.h"
15#include "llvm/MC/MCStreamer.h"
16#include "llvm/MC/MCExpr.h"
17#include "llvm/MC/MCInst.h"
18#include "llvm/Support/SourceMgr.h"
19#include "llvm/Target/TargetRegistry.h"
20#include "llvm/Target/TargetAsmParser.h"
21using namespace llvm;
22
23namespace {
24struct ARMOperand;
25
26// The shift types for register controlled shifts in arm memory addressing
27enum ShiftType {
28  Lsl,
29  Lsr,
30  Asr,
31  Ror,
32  Rrx
33};
34
35class ARMAsmParser : public TargetAsmParser {
36  MCAsmParser &Parser;
37
38private:
39  MCAsmParser &getParser() const { return Parser; }
40
41  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
42
43  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
44
45  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
46
47  bool ParseRegister(ARMOperand &Op);
48
49  bool ParseRegisterList(ARMOperand &Op);
50
51  bool ParseMemory(ARMOperand &Op);
52
53  bool ParseShift(enum ShiftType *St, const MCExpr *ShiftAmount);
54
55  bool ParseOperand(ARMOperand &Op);
56
57  bool ParseDirectiveWord(unsigned Size, SMLoc L);
58
59  // TODO - For now hacked versions of the next two are in here in this file to
60  // allow some parser testing until the table gen versions are implemented.
61
62  /// @name Auto-generated Match Functions
63  /// {
64  bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
65                        MCInst &Inst);
66
67  /// MatchRegisterName - Match the given string to a register name and return
68  /// its register number, or -1 if there is no match.  To allow return values
69  /// to be used directly in register lists, arm registers have values between
70  /// 0 and 15.
71  int MatchRegisterName(const StringRef &Name);
72
73  /// }
74
75
76public:
77  ARMAsmParser(const Target &T, MCAsmParser &_Parser)
78    : TargetAsmParser(T), Parser(_Parser) {}
79
80  virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);
81
82  virtual bool ParseDirective(AsmToken DirectiveID);
83};
84
85} // end anonymous namespace
86
87namespace {
88
89/// ARMOperand - Instances of this class represent a parsed ARM machine
90/// instruction.
91struct ARMOperand {
92  enum {
93    Token,
94    Register,
95    Memory
96  } Kind;
97
98
99  union {
100    struct {
101      const char *Data;
102      unsigned Length;
103    } Tok;
104
105    struct {
106      unsigned RegNum;
107      bool Writeback;
108    } Reg;
109
110    // This is for all forms of ARM address expressions
111    struct {
112      unsigned BaseRegNum;
113      bool OffsetIsReg;
114      const MCExpr *Offset; // used when OffsetIsReg is false
115      unsigned OffsetRegNum; // used when OffsetIsReg is true
116      bool OffsetRegShifted; // only used when OffsetIsReg is true
117      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
118      const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
119      bool Preindexed;
120      bool Postindexed;
121      bool Negative; // only used when OffsetIsReg is true
122      bool Writeback;
123    } Mem;
124
125  };
126
127  StringRef getToken() const {
128    assert(Kind == Token && "Invalid access!");
129    return StringRef(Tok.Data, Tok.Length);
130  }
131
132  unsigned getReg() const {
133    assert(Kind == Register && "Invalid access!");
134    return Reg.RegNum;
135  }
136
137  bool isToken() const {return Kind == Token; }
138
139  bool isReg() const { return Kind == Register; }
140
141  void addRegOperands(MCInst &Inst, unsigned N) const {
142    assert(N == 1 && "Invalid number of operands!");
143    Inst.addOperand(MCOperand::CreateReg(getReg()));
144  }
145
146  static ARMOperand CreateToken(StringRef Str) {
147    ARMOperand Res;
148    Res.Kind = Token;
149    Res.Tok.Data = Str.data();
150    Res.Tok.Length = Str.size();
151    return Res;
152  }
153
154  static ARMOperand CreateReg(unsigned RegNum, bool Writeback) {
155    ARMOperand Res;
156    Res.Kind = Register;
157    Res.Reg.RegNum = RegNum;
158    Res.Reg.Writeback = Writeback;
159    return Res;
160  }
161
162  static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
163                              const MCExpr *Offset, unsigned OffsetRegNum,
164                              bool OffsetRegShifted, enum ShiftType ShiftType,
165                              const MCExpr *ShiftAmount, bool Preindexed,
166                              bool Postindexed, bool Negative, bool Writeback) {
167    ARMOperand Res;
168    Res.Kind = Memory;
169    Res.Mem.BaseRegNum = BaseRegNum;
170    Res.Mem.OffsetIsReg = OffsetIsReg;
171    Res.Mem.Offset = Offset;
172    Res.Mem.OffsetRegNum = OffsetRegNum;
173    Res.Mem.OffsetRegShifted = OffsetRegShifted;
174    Res.Mem.ShiftType = ShiftType;
175    Res.Mem.ShiftAmount = ShiftAmount;
176    Res.Mem.Preindexed = Preindexed;
177    Res.Mem.Postindexed = Postindexed;
178    Res.Mem.Negative = Negative;
179    Res.Mem.Writeback = Writeback;
180    return Res;
181  }
182};
183
184} // end anonymous namespace.
185
186// Try to parse a register name.  The token must be an Identifier when called,
187// and if it is a register name a Reg operand is created, the token is eaten
188// and false is returned.  Else true is returned and no token is eaten.
189// TODO this is likely to change to allow different register types and or to
190// parse for a specific register type.
191bool ARMAsmParser::ParseRegister(ARMOperand &Op) {
192  const AsmToken &Tok = getLexer().getTok();
193  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
194
195  // FIXME: Validate register for the current architecture; we have to do
196  // validation later, so maybe there is no need for this here.
197  int RegNum;
198
199  RegNum = MatchRegisterName(Tok.getString());
200  if (RegNum == -1)
201    return true;
202  getLexer().Lex(); // Eat identifier token.
203
204  bool Writeback = false;
205  const AsmToken &ExclaimTok = getLexer().getTok();
206  if (ExclaimTok.is(AsmToken::Exclaim)) {
207    Writeback = true;
208    getLexer().Lex(); // Eat exclaim token
209  }
210
211  Op = ARMOperand::CreateReg(RegNum, Writeback);
212
213  return false;
214}
215
216// Try to parse a register list.  The first token must be a '{' when called
217// for now.
218bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) {
219  const AsmToken &LCurlyTok = getLexer().getTok();
220  assert(LCurlyTok.is(AsmToken::LCurly) && "Token is not an Left Curly Brace");
221  getLexer().Lex(); // Eat left curly brace token.
222
223  const AsmToken &RegTok = getLexer().getTok();
224  SMLoc RegLoc = RegTok.getLoc();
225  if (RegTok.isNot(AsmToken::Identifier))
226    return Error(RegLoc, "register expected");
227  int RegNum = MatchRegisterName(RegTok.getString());
228  if (RegNum == -1)
229    return Error(RegLoc, "register expected");
230  getLexer().Lex(); // Eat identifier token.
231  unsigned RegList = 1 << RegNum;
232
233  int HighRegNum = RegNum;
234  // TODO ranges like "{Rn-Rm}"
235  while (getLexer().getTok().is(AsmToken::Comma)) {
236    getLexer().Lex(); // Eat comma token.
237
238    const AsmToken &RegTok = getLexer().getTok();
239    SMLoc RegLoc = RegTok.getLoc();
240    if (RegTok.isNot(AsmToken::Identifier))
241      return Error(RegLoc, "register expected");
242    int RegNum = MatchRegisterName(RegTok.getString());
243    if (RegNum == -1)
244      return Error(RegLoc, "register expected");
245
246    if (RegList & (1 << RegNum))
247      Warning(RegLoc, "register duplicated in register list");
248    else if (RegNum <= HighRegNum)
249      Warning(RegLoc, "register not in ascending order in register list");
250    RegList |= 1 << RegNum;
251    HighRegNum = RegNum;
252
253    getLexer().Lex(); // Eat identifier token.
254  }
255  const AsmToken &RCurlyTok = getLexer().getTok();
256  if (RCurlyTok.isNot(AsmToken::RCurly))
257    return Error(RCurlyTok.getLoc(), "'}' expected");
258  getLexer().Lex(); // Eat left curly brace token.
259
260  return false;
261}
262
263// Try to parse an arm memory expression.  It must start with a '[' token.
264// TODO Only preindexing and postindexing addressing are started, unindexed
265// with option, etc are still to do.
266bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
267  const AsmToken &LBracTok = getLexer().getTok();
268  assert(LBracTok.is(AsmToken::LBrac) && "Token is not an Left Bracket");
269  getLexer().Lex(); // Eat left bracket token.
270
271  const AsmToken &BaseRegTok = getLexer().getTok();
272  if (BaseRegTok.isNot(AsmToken::Identifier))
273    return Error(BaseRegTok.getLoc(), "register expected");
274  int BaseRegNum = MatchRegisterName(BaseRegTok.getString());
275  if (BaseRegNum == -1)
276    return Error(BaseRegTok.getLoc(), "register expected");
277  getLexer().Lex(); // Eat identifier token.
278
279  bool Preindexed = false;
280  bool Postindexed = false;
281  bool OffsetIsReg = false;
282  bool Negative = false;
283  bool Writeback = false;
284
285  // First look for preindexed address forms:
286  //  [Rn, +/-Rm]
287  //  [Rn, #offset]
288  //  [Rn, +/-Rm, shift]
289  // that is after the "[Rn" we now have see if the next token is a comma.
290  const AsmToken &Tok = getLexer().getTok();
291  if (Tok.is(AsmToken::Comma)) {
292    Preindexed = true;
293    getLexer().Lex(); // Eat comma token.
294
295    const AsmToken &NextTok = getLexer().getTok();
296    if (NextTok.is(AsmToken::Plus))
297      getLexer().Lex(); // Eat plus token.
298    else if (NextTok.is(AsmToken::Minus)) {
299      Negative = true;
300      getLexer().Lex(); // Eat minus token
301    }
302
303    // See if there is a register following the "[Rn," we have so far.
304    const AsmToken &OffsetRegTok = getLexer().getTok();
305    int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
306    bool OffsetRegShifted = false;
307    enum ShiftType ShiftType;
308    const MCExpr *ShiftAmount;
309    const MCExpr *Offset;
310    if (OffsetRegNum != -1) {
311      OffsetIsReg = true;
312      getLexer().Lex(); // Eat identifier token for the offset register.
313      // Look for a comma then a shift
314      const AsmToken &Tok = getLexer().getTok();
315      if (Tok.is(AsmToken::Comma)) {
316        getLexer().Lex(); // Eat comma token.
317
318        const AsmToken &Tok = getLexer().getTok();
319        if (ParseShift(&ShiftType, ShiftAmount))
320          return Error(Tok.getLoc(), "shift expected");
321        OffsetRegShifted = true;
322      }
323    }
324    else { // "[Rn," we have so far was not followed by "Rm"
325      // Look for #offset following the "[Rn,"
326      const AsmToken &HashTok = getLexer().getTok();
327      if (HashTok.isNot(AsmToken::Hash))
328        return Error(HashTok.getLoc(), "'#' expected");
329      getLexer().Lex(); // Eat hash token.
330
331      if (getParser().ParseExpression(Offset))
332       return true;
333    }
334    const AsmToken &RBracTok = getLexer().getTok();
335    if (RBracTok.isNot(AsmToken::RBrac))
336      return Error(RBracTok.getLoc(), "']' expected");
337    getLexer().Lex(); // Eat right bracket token.
338
339    const AsmToken &ExclaimTok = getLexer().getTok();
340    if (ExclaimTok.is(AsmToken::Exclaim)) {
341      Writeback = true;
342      getLexer().Lex(); // Eat exclaim token
343    }
344    Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
345                               OffsetRegShifted, ShiftType, ShiftAmount,
346                               Preindexed, Postindexed, Negative, Writeback);
347    return false;
348  }
349  // The "[Rn" we have so far was not followed by a comma.
350  else if (Tok.is(AsmToken::RBrac)) {
351    // This is a post indexing addressing forms:
352    //  [Rn], #offset
353    //  [Rn], +/-Rm
354    //  [Rn], +/-Rm, shift
355    // that is a ']' follows after the "[Rn".
356    Postindexed = true;
357    Writeback = true;
358    getLexer().Lex(); // Eat right bracket token.
359
360    const AsmToken &CommaTok = getLexer().getTok();
361    if (CommaTok.isNot(AsmToken::Comma))
362      return Error(CommaTok.getLoc(), "',' expected");
363    getLexer().Lex(); // Eat comma token.
364
365    const AsmToken &NextTok = getLexer().getTok();
366    if (NextTok.is(AsmToken::Plus))
367      getLexer().Lex(); // Eat plus token.
368    else if (NextTok.is(AsmToken::Minus)) {
369      Negative = true;
370      getLexer().Lex(); // Eat minus token
371    }
372
373    // See if there is a register following the "[Rn]," we have so far.
374    const AsmToken &OffsetRegTok = getLexer().getTok();
375    int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString());
376    bool OffsetRegShifted = false;
377    enum ShiftType ShiftType;
378    const MCExpr *ShiftAmount;
379    const MCExpr *Offset;
380    if (OffsetRegNum != -1) {
381      OffsetIsReg = true;
382      getLexer().Lex(); // Eat identifier token for the offset register.
383      // Look for a comma then a shift
384      const AsmToken &Tok = getLexer().getTok();
385      if (Tok.is(AsmToken::Comma)) {
386        getLexer().Lex(); // Eat comma token.
387
388        const AsmToken &Tok = getLexer().getTok();
389        if (ParseShift(&ShiftType, ShiftAmount))
390          return Error(Tok.getLoc(), "shift expected");
391        OffsetRegShifted = true;
392      }
393    }
394    else { // "[Rn]," we have so far was not followed by "Rm"
395      // Look for #offset following the "[Rn],"
396      const AsmToken &HashTok = getLexer().getTok();
397      if (HashTok.isNot(AsmToken::Hash))
398        return Error(HashTok.getLoc(), "'#' expected");
399      getLexer().Lex(); // Eat hash token.
400
401      if (getParser().ParseExpression(Offset))
402       return true;
403    }
404    Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
405                               OffsetRegShifted, ShiftType, ShiftAmount,
406                               Preindexed, Postindexed, Negative, Writeback);
407    return false;
408  }
409
410  return true;
411}
412
413/// ParseShift as one of these two:
414///   ( lsl | lsr | asr | ror ) , # shift_amount
415///   rrx
416/// and returns true if it parses a shift otherwise it returns false.
417bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *ShiftAmount) {
418  const AsmToken &Tok = getLexer().getTok();
419  if (Tok.isNot(AsmToken::Identifier))
420    return true;
421  const StringRef &ShiftName = Tok.getString();
422  if (ShiftName == "lsl" || ShiftName == "LSL")
423    *St = Lsl;
424  else if (ShiftName == "lsr" || ShiftName == "LSR")
425    *St = Lsr;
426  else if (ShiftName == "asr" || ShiftName == "ASR")
427    *St = Asr;
428  else if (ShiftName == "ror" || ShiftName == "ROR")
429    *St = Ror;
430  else if (ShiftName == "rrx" || ShiftName == "RRX")
431    *St = Rrx;
432  else
433    return true;
434  getLexer().Lex(); // Eat shift type token.
435
436  // For all but a Rotate right there must be a '#' and a shift amount
437  if (*St != Rrx) {
438    // Look for # following the shift type
439    const AsmToken &HashTok = getLexer().getTok();
440    if (HashTok.isNot(AsmToken::Hash))
441      return Error(HashTok.getLoc(), "'#' expected");
442    getLexer().Lex(); // Eat hash token.
443
444    if (getParser().ParseExpression(ShiftAmount))
445      return true;
446  }
447
448  return false;
449}
450
451// A hack to allow some testing
452int ARMAsmParser::MatchRegisterName(const StringRef &Name) {
453  if (Name == "r0" || Name == "R0")
454    return 0;
455  else if (Name == "r1" || Name == "R1")
456    return 1;
457  else if (Name == "r2" || Name == "R2")
458    return 2;
459  else if (Name == "r3" || Name == "R3")
460    return 3;
461  else if (Name == "r3" || Name == "R3")
462    return 3;
463  else if (Name == "r4" || Name == "R4")
464    return 4;
465  else if (Name == "r5" || Name == "R5")
466    return 5;
467  else if (Name == "r6" || Name == "R6")
468    return 6;
469  else if (Name == "r7" || Name == "R7")
470    return 7;
471  else if (Name == "r8" || Name == "R8")
472    return 8;
473  else if (Name == "r9" || Name == "R9")
474    return 9;
475  else if (Name == "r10" || Name == "R10")
476    return 10;
477  else if (Name == "r11" || Name == "R11" || Name == "fp")
478    return 11;
479  else if (Name == "r12" || Name == "R12" || Name == "ip")
480    return 12;
481  else if (Name == "r13" || Name == "R13" || Name == "sp")
482    return 13;
483  else if (Name == "r14" || Name == "R14" || Name == "lr")
484      return 14;
485  else if (Name == "r15" || Name == "R15" || Name == "pc")
486    return 15;
487  return -1;
488}
489
490// A hack to allow some testing
491bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
492                                    MCInst &Inst) {
493  struct ARMOperand Op0 = Operands[0];
494  assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
495  const StringRef &Mnemonic = Op0.getToken();
496  if (Mnemonic == "add" ||
497      Mnemonic == "stmfd" ||
498      Mnemonic == "str" ||
499      Mnemonic == "ldmfd" ||
500      Mnemonic == "ldr" ||
501      Mnemonic == "mov")
502    return false;
503
504  return true;
505}
506
507// TODO - this is a work in progress
508bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
509  switch (getLexer().getKind()) {
510  case AsmToken::Identifier:
511    if (!ParseRegister(Op))
512      return false;
513    // TODO parse other operands that start with an identifier like labels
514    return Error(getLexer().getTok().getLoc(), "labels not yet supported");
515  case AsmToken::LBrac:
516    if (!ParseMemory(Op))
517      return false;
518  case AsmToken::LCurly:
519    if (!ParseRegisterList(Op))
520      return(false);
521  case AsmToken::Hash:
522    return Error(getLexer().getTok().getLoc(), "immediates not yet supported");
523  default:
524    return Error(getLexer().getTok().getLoc(), "unexpected token in operand");
525  }
526}
527
528bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
529  SmallVector<ARMOperand, 7> Operands;
530
531  Operands.push_back(ARMOperand::CreateToken(Name));
532
533  SMLoc Loc = getLexer().getTok().getLoc();
534  if (getLexer().isNot(AsmToken::EndOfStatement)) {
535
536    // Read the first operand.
537    Operands.push_back(ARMOperand());
538    if (ParseOperand(Operands.back()))
539      return true;
540
541    while (getLexer().is(AsmToken::Comma)) {
542      getLexer().Lex();  // Eat the comma.
543
544      // Parse and remember the operand.
545      Operands.push_back(ARMOperand());
546      if (ParseOperand(Operands.back()))
547        return true;
548    }
549  }
550  if (!MatchInstruction(Operands, Inst))
551    return false;
552
553  Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented");
554  return true;
555}
556
557bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
558  StringRef IDVal = DirectiveID.getIdentifier();
559  if (IDVal == ".word")
560    return ParseDirectiveWord(4, DirectiveID.getLoc());
561  return true;
562}
563
564/// ParseDirectiveWord
565///  ::= .word [ expression (, expression)* ]
566bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
567  if (getLexer().isNot(AsmToken::EndOfStatement)) {
568    for (;;) {
569      const MCExpr *Value;
570      if (getParser().ParseExpression(Value))
571        return true;
572
573      getParser().getStreamer().EmitValue(Value, Size);
574
575      if (getLexer().is(AsmToken::EndOfStatement))
576        break;
577
578      // FIXME: Improve diagnostic.
579      if (getLexer().isNot(AsmToken::Comma))
580        return Error(L, "unexpected token in directive");
581      getLexer().Lex();
582    }
583  }
584
585  getLexer().Lex();
586  return false;
587}
588
589// Force static initialization.
590extern "C" void LLVMInitializeARMAsmParser() {
591  RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
592  RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
593}
594