MCAsmLexer.h revision 7b6f2034ac355bd3b3cc88960bf8d0e694fe3db4
1//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_MC_MCASMLEXER_H
11#define LLVM_MC_MCASMLEXER_H
12
13#include "llvm/ADT/StringRef.h"
14#include "llvm/Support/Compiler.h"
15#include "llvm/Support/DataTypes.h"
16#include "llvm/Support/SMLoc.h"
17
18namespace llvm {
19
20/// AsmToken - Target independent representation for an assembler token.
21class AsmToken {
22public:
23  enum TokenKind {
24    // Markers
25    Eof, Error,
26
27    // String values.
28    Identifier,
29    String,
30
31    // Integer values.
32    Integer,
33
34    // Real values.
35    Real,
36
37    // Register values (stored in IntVal).  Only used by MCTargetAsmLexer.
38    Register,
39
40    // No-value.
41    EndOfStatement,
42    Colon,
43    Space,
44    Plus, Minus, Tilde,
45    Slash,    // '/'
46    BackSlash, // '\'
47    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
48    Star, Dot, Comma, Dollar, Equal, EqualEqual,
49
50    Pipe, PipePipe, Caret,
51    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
52    Less, LessEqual, LessLess, LessGreater,
53    Greater, GreaterEqual, GreaterGreater, At
54  };
55
56private:
57  TokenKind Kind;
58
59  /// A reference to the entire token contents; this is always a pointer into
60  /// a memory buffer owned by the source manager.
61  StringRef Str;
62
63  int64_t IntVal;
64
65public:
66  AsmToken() {}
67  AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
68    : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
69
70  TokenKind getKind() const { return Kind; }
71  bool is(TokenKind K) const { return Kind == K; }
72  bool isNot(TokenKind K) const { return Kind != K; }
73
74  SMLoc getLoc() const;
75  SMLoc getEndLoc() const;
76
77  /// getStringContents - Get the contents of a string token (without quotes).
78  StringRef getStringContents() const {
79    assert(Kind == String && "This token isn't a string!");
80    return Str.slice(1, Str.size() - 1);
81  }
82
83  /// getIdentifier - Get the identifier string for the current token, which
84  /// should be an identifier or a string. This gets the portion of the string
85  /// which should be used as the identifier, e.g., it does not include the
86  /// quotes on strings.
87  StringRef getIdentifier() const {
88    if (Kind == Identifier)
89      return getString();
90    return getStringContents();
91  }
92
93  /// getString - Get the string for the current token, this includes all
94  /// characters (for example, the quotes on strings) in the token.
95  ///
96  /// The returned StringRef points into the source manager's memory buffer, and
97  /// is safe to store across calls to Lex().
98  StringRef getString() const { return Str; }
99
100  // FIXME: Don't compute this in advance, it makes every token larger, and is
101  // also not generally what we want (it is nicer for recovery etc. to lex 123br
102  // as a single token, then diagnose as an invalid number).
103  int64_t getIntVal() const {
104    assert(Kind == Integer && "This token isn't an integer!");
105    return IntVal;
106  }
107
108  /// getRegVal - Get the register number for the current token, which should
109  /// be a register.
110  unsigned getRegVal() const {
111    assert(Kind == Register && "This token isn't a register!");
112    return static_cast<unsigned>(IntVal);
113  }
114};
115
116/// MCAsmLexer - Generic assembler lexer interface, for use by target specific
117/// assembly lexers.
118class MCAsmLexer {
119  /// The current token, stored in the base class for faster access.
120  AsmToken CurTok;
121
122  /// The location and description of the current error
123  SMLoc ErrLoc;
124  std::string Err;
125
126  MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
127  void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
128protected: // Can only create subclasses.
129  const char *TokStart;
130  bool SkipSpace;
131
132  MCAsmLexer();
133
134  virtual AsmToken LexToken() = 0;
135
136  void SetError(const SMLoc &errLoc, const std::string &err) {
137    ErrLoc = errLoc;
138    Err = err;
139  }
140
141public:
142  virtual ~MCAsmLexer();
143
144  /// Lex - Consume the next token from the input stream and return it.
145  ///
146  /// The lexer will continuosly return the end-of-file token once the end of
147  /// the main input file has been reached.
148  const AsmToken &Lex() {
149    return CurTok = LexToken();
150  }
151
152  virtual StringRef LexUntilEndOfStatement() = 0;
153
154  /// getLoc - Get the current source location.
155  SMLoc getLoc() const;
156
157  /// getTok - Get the current (last) lexed token.
158  const AsmToken &getTok() {
159    return CurTok;
160  }
161
162  /// getErrLoc - Get the current error location
163  const SMLoc &getErrLoc() {
164    return ErrLoc;
165  }
166
167  /// getErr - Get the current error string
168  const std::string &getErr() {
169    return Err;
170  }
171
172  /// getKind - Get the kind of current token.
173  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
174
175  /// is - Check if the current token has kind \p K.
176  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
177
178  /// isNot - Check if the current token has kind \p K.
179  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
180
181  /// setSkipSpace - Set whether spaces should be ignored by the lexer
182  void setSkipSpace(bool val) { SkipSpace = val; }
183};
184
185} // End llvm namespace
186
187#endif
188