1//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11#define LLVM_MC_MCPARSER_MCASMLEXER_H
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/StringRef.h"
15#include "llvm/Support/Compiler.h"
16#include "llvm/Support/DataTypes.h"
17#include "llvm/Support/SMLoc.h"
18
19namespace llvm {
20
21/// Target independent representation for an assembler token.
22class AsmToken {
23public:
24  enum TokenKind {
25    // Markers
26    Eof, Error,
27
28    // String values.
29    Identifier,
30    String,
31
32    // Integer values.
33    Integer,
34    BigNum, // larger than 64 bits
35
36    // Real values.
37    Real,
38
39    // No-value.
40    EndOfStatement,
41    Colon,
42    Space,
43    Plus, Minus, Tilde,
44    Slash,    // '/'
45    BackSlash, // '\'
46    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
47    Star, Dot, Comma, Dollar, Equal, EqualEqual,
48
49    Pipe, PipePipe, Caret,
50    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
51    Less, LessEqual, LessLess, LessGreater,
52    Greater, GreaterEqual, GreaterGreater, At
53  };
54
55private:
56  TokenKind Kind;
57
58  /// A reference to the entire token contents; this is always a pointer into
59  /// a memory buffer owned by the source manager.
60  StringRef Str;
61
62  APInt IntVal;
63
64public:
65  AsmToken() {}
66  AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
67      : Kind(Kind), Str(Str), IntVal(IntVal) {}
68  AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
69      : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
70
71  TokenKind getKind() const { return Kind; }
72  bool is(TokenKind K) const { return Kind == K; }
73  bool isNot(TokenKind K) const { return Kind != K; }
74
75  SMLoc getLoc() const;
76  SMLoc getEndLoc() const;
77  SMRange getLocRange() const;
78
79  /// Get the contents of a string token (without quotes).
80  StringRef getStringContents() const {
81    assert(Kind == String && "This token isn't a string!");
82    return Str.slice(1, Str.size() - 1);
83  }
84
85  /// Get the identifier string for the current token, which should be an
86  /// identifier or a string. This gets the portion of the string which should
87  /// be used as the identifier, e.g., it does not include the quotes on
88  /// strings.
89  StringRef getIdentifier() const {
90    if (Kind == Identifier)
91      return getString();
92    return getStringContents();
93  }
94
95  /// Get the string for the current token, this includes all characters (for
96  /// example, the quotes on strings) in the token.
97  ///
98  /// The returned StringRef points into the source manager's memory buffer, and
99  /// is safe to store across calls to Lex().
100  StringRef getString() const { return Str; }
101
102  // FIXME: Don't compute this in advance, it makes every token larger, and is
103  // also not generally what we want (it is nicer for recovery etc. to lex 123br
104  // as a single token, then diagnose as an invalid number).
105  int64_t getIntVal() const {
106    assert(Kind == Integer && "This token isn't an integer!");
107    return IntVal.getZExtValue();
108  }
109
110  APInt getAPIntVal() const {
111    assert((Kind == Integer || Kind == BigNum) &&
112           "This token isn't an integer!");
113    return IntVal;
114  }
115};
116
117/// Generic assembler lexer interface, for use by target specific assembly
118/// lexers.
119class MCAsmLexer {
120  /// The current token, stored in the base class for faster access.
121  AsmToken CurTok;
122
123  /// The location and description of the current error
124  SMLoc ErrLoc;
125  std::string Err;
126
127  MCAsmLexer(const MCAsmLexer &) = delete;
128  void operator=(const MCAsmLexer &) = delete;
129protected: // Can only create subclasses.
130  const char *TokStart;
131  bool SkipSpace;
132  bool AllowAtInIdentifier;
133
134  MCAsmLexer();
135
136  virtual AsmToken LexToken() = 0;
137
138  void SetError(const SMLoc &errLoc, const std::string &err) {
139    ErrLoc = errLoc;
140    Err = err;
141  }
142
143public:
144  virtual ~MCAsmLexer();
145
146  /// Consume the next token from the input stream and return it.
147  ///
148  /// The lexer will continuosly return the end-of-file token once the end of
149  /// the main input file has been reached.
150  const AsmToken &Lex() {
151    return CurTok = LexToken();
152  }
153
154  virtual StringRef LexUntilEndOfStatement() = 0;
155
156  /// Get the current source location.
157  SMLoc getLoc() const;
158
159  /// Get the current (last) lexed token.
160  const AsmToken &getTok() const {
161    return CurTok;
162  }
163
164  /// Look ahead at the next token to be lexed.
165  virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0;
166
167  /// Get the current error location
168  const SMLoc &getErrLoc() {
169    return ErrLoc;
170  }
171
172  /// Get the current error string
173  const std::string &getErr() {
174    return Err;
175  }
176
177  /// Get the kind of current token.
178  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
179
180  /// Check if the current token has kind \p K.
181  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
182
183  /// Check if the current token has kind \p K.
184  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
185
186  /// Set whether spaces should be ignored by the lexer
187  void setSkipSpace(bool val) { SkipSpace = val; }
188
189  bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
190  void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
191};
192
193} // End llvm namespace
194
195#endif
196