MCAsmLexer.h revision ac29c0af3da1bc1da414899a78ffb0a2360f8cda
1//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_MC_MCASMLEXER_H
11#define LLVM_MC_MCASMLEXER_H
12
13#include "llvm/ADT/StringRef.h"
14#include "llvm/Support/DataTypes.h"
15#include "llvm/Support/SMLoc.h"
16
17namespace llvm {
18class MCAsmLexer;
19class MCInst;
20
21/// AsmToken - Target independent representation for an assembler token.
22class AsmToken {
23public:
24  enum TokenKind {
25    // Markers
26    Eof, Error,
27
28    // String values.
29    Identifier,
30    String,
31
32    // Integer values.
33    Integer,
34
35    // Real values.
36    Real,
37
38    // Register values (stored in IntVal).  Only used by MCTargetAsmLexer.
39    Register,
40
41    // No-value.
42    EndOfStatement,
43    Colon,
44    Plus, Minus, Tilde,
45    Slash,    // '/'
46    BackSlash, // '\'
47    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
48    Star, Dot, Comma, Dollar, Equal, EqualEqual,
49
50    Pipe, PipePipe, Caret,
51    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
52    Less, LessEqual, LessLess, LessGreater,
53    Greater, GreaterEqual, GreaterGreater, At
54  };
55
56  TokenKind Kind;
57
58  /// A reference to the entire token contents; this is always a pointer into
59  /// a memory buffer owned by the source manager.
60  StringRef Str;
61
62  int64_t IntVal;
63
64public:
65  AsmToken() {}
66  AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
67    : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
68
69  TokenKind getKind() const { return Kind; }
70  bool is(TokenKind K) const { return Kind == K; }
71  bool isNot(TokenKind K) const { return Kind != K; }
72
73  SMLoc getLoc() const;
74
75  /// getStringContents - Get the contents of a string token (without quotes).
76  StringRef getStringContents() const {
77    assert(Kind == String && "This token isn't a string!");
78    return Str.slice(1, Str.size() - 1);
79  }
80
81  /// getIdentifier - Get the identifier string for the current token, which
82  /// should be an identifier or a string. This gets the portion of the string
83  /// which should be used as the identifier, e.g., it does not include the
84  /// quotes on strings.
85  StringRef getIdentifier() const {
86    if (Kind == Identifier)
87      return getString();
88    return getStringContents();
89  }
90
91  /// getString - Get the string for the current token, this includes all
92  /// characters (for example, the quotes on strings) in the token.
93  ///
94  /// The returned StringRef points into the source manager's memory buffer, and
95  /// is safe to store across calls to Lex().
96  StringRef getString() const { return Str; }
97
98  // FIXME: Don't compute this in advance, it makes every token larger, and is
99  // also not generally what we want (it is nicer for recovery etc. to lex 123br
100  // as a single token, then diagnose as an invalid number).
101  int64_t getIntVal() const {
102    assert(Kind == Integer && "This token isn't an integer!");
103    return IntVal;
104  }
105
106  /// getRegVal - Get the register number for the current token, which should
107  /// be a register.
108  unsigned getRegVal() const {
109    assert(Kind == Register && "This token isn't a register!");
110    return static_cast<unsigned>(IntVal);
111  }
112};
113
114/// MCAsmLexer - Generic assembler lexer interface, for use by target specific
115/// assembly lexers.
116class MCAsmLexer {
117  /// The current token, stored in the base class for faster access.
118  AsmToken CurTok;
119
120  /// The location and description of the current error
121  SMLoc ErrLoc;
122  std::string Err;
123
124  MCAsmLexer(const MCAsmLexer &);   // DO NOT IMPLEMENT
125  void operator=(const MCAsmLexer &);  // DO NOT IMPLEMENT
126protected: // Can only create subclasses.
127  const char *TokStart;
128
129  MCAsmLexer();
130
131  virtual AsmToken LexToken() = 0;
132
133  void SetError(const SMLoc &errLoc, const std::string &err) {
134    ErrLoc = errLoc;
135    Err = err;
136  }
137
138public:
139  virtual ~MCAsmLexer();
140
141  /// Lex - Consume the next token from the input stream and return it.
142  ///
143  /// The lexer will continuosly return the end-of-file token once the end of
144  /// the main input file has been reached.
145  const AsmToken &Lex() {
146    return CurTok = LexToken();
147  }
148
149  virtual StringRef LexUntilEndOfStatement() = 0;
150
151  /// getLoc - Get the current source location.
152  SMLoc getLoc() const;
153
154  /// getTok - Get the current (last) lexed token.
155  const AsmToken &getTok() {
156    return CurTok;
157  }
158
159  /// getErrLoc - Get the current error location
160  const SMLoc &getErrLoc() {
161    return ErrLoc;
162  }
163
164  /// getErr - Get the current error string
165  const std::string &getErr() {
166    return Err;
167  }
168
169  /// getKind - Get the kind of current token.
170  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
171
172  /// is - Check if the current token has kind \arg K.
173  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
174
175  /// isNot - Check if the current token has kind \arg K.
176  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
177};
178
179} // End llvm namespace
180
181#endif
182