1//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11#define LLVM_MC_MCPARSER_MCASMLEXER_H
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/StringRef.h"
15#include "llvm/Support/Compiler.h"
16#include "llvm/Support/DataTypes.h"
17#include "llvm/Support/SMLoc.h"
18
19namespace llvm {
20
21/// AsmToken - Target independent representation for an assembler token.
22class AsmToken {
23public:
24  enum TokenKind {
25    // Markers
26    Eof, Error,
27
28    // String values.
29    Identifier,
30    String,
31
32    // Integer values.
33    Integer,
34    BigNum, // larger than 64 bits
35
36    // Real values.
37    Real,
38
39    // No-value.
40    EndOfStatement,
41    Colon,
42    Space,
43    Plus, Minus, Tilde,
44    Slash,    // '/'
45    BackSlash, // '\'
46    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
47    Star, Dot, Comma, Dollar, Equal, EqualEqual,
48
49    Pipe, PipePipe, Caret,
50    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
51    Less, LessEqual, LessLess, LessGreater,
52    Greater, GreaterEqual, GreaterGreater, At
53  };
54
55private:
56  TokenKind Kind;
57
58  /// A reference to the entire token contents; this is always a pointer into
59  /// a memory buffer owned by the source manager.
60  StringRef Str;
61
62  APInt IntVal;
63
64public:
65  AsmToken() {}
66  AsmToken(TokenKind _Kind, StringRef _Str, APInt _IntVal)
67    : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
68  AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
69    : Kind(_Kind), Str(_Str), IntVal(64, _IntVal, true) {}
70
71  TokenKind getKind() const { return Kind; }
72  bool is(TokenKind K) const { return Kind == K; }
73  bool isNot(TokenKind K) const { return Kind != K; }
74
75  SMLoc getLoc() const;
76  SMLoc getEndLoc() const;
77
78  /// getStringContents - Get the contents of a string token (without quotes).
79  StringRef getStringContents() const {
80    assert(Kind == String && "This token isn't a string!");
81    return Str.slice(1, Str.size() - 1);
82  }
83
84  /// getIdentifier - Get the identifier string for the current token, which
85  /// should be an identifier or a string. This gets the portion of the string
86  /// which should be used as the identifier, e.g., it does not include the
87  /// quotes on strings.
88  StringRef getIdentifier() const {
89    if (Kind == Identifier)
90      return getString();
91    return getStringContents();
92  }
93
94  /// getString - Get the string for the current token, this includes all
95  /// characters (for example, the quotes on strings) in the token.
96  ///
97  /// The returned StringRef points into the source manager's memory buffer, and
98  /// is safe to store across calls to Lex().
99  StringRef getString() const { return Str; }
100
101  // FIXME: Don't compute this in advance, it makes every token larger, and is
102  // also not generally what we want (it is nicer for recovery etc. to lex 123br
103  // as a single token, then diagnose as an invalid number).
104  int64_t getIntVal() const {
105    assert(Kind == Integer && "This token isn't an integer!");
106    return IntVal.getZExtValue();
107  }
108
109  APInt getAPIntVal() const {
110    assert((Kind == Integer || Kind == BigNum) &&
111           "This token isn't an integer!");
112    return IntVal;
113  }
114};
115
116/// MCAsmLexer - Generic assembler lexer interface, for use by target specific
117/// assembly lexers.
118class MCAsmLexer {
119  /// The current token, stored in the base class for faster access.
120  AsmToken CurTok;
121
122  /// The location and description of the current error
123  SMLoc ErrLoc;
124  std::string Err;
125
126  MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
127  void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
128protected: // Can only create subclasses.
129  const char *TokStart;
130  bool SkipSpace;
131  bool AllowAtInIdentifier;
132
133  MCAsmLexer();
134
135  virtual AsmToken LexToken() = 0;
136
137  void SetError(const SMLoc &errLoc, const std::string &err) {
138    ErrLoc = errLoc;
139    Err = err;
140  }
141
142public:
143  virtual ~MCAsmLexer();
144
145  /// Lex - Consume the next token from the input stream and return it.
146  ///
147  /// The lexer will continuosly return the end-of-file token once the end of
148  /// the main input file has been reached.
149  const AsmToken &Lex() {
150    return CurTok = LexToken();
151  }
152
153  virtual StringRef LexUntilEndOfStatement() = 0;
154
155  /// getLoc - Get the current source location.
156  SMLoc getLoc() const;
157
158  /// getTok - Get the current (last) lexed token.
159  const AsmToken &getTok() {
160    return CurTok;
161  }
162
163  /// peekTok - Look ahead at the next token to be lexed.
164  virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0;
165
166  /// getErrLoc - Get the current error location
167  const SMLoc &getErrLoc() {
168    return ErrLoc;
169  }
170
171  /// getErr - Get the current error string
172  const std::string &getErr() {
173    return Err;
174  }
175
176  /// getKind - Get the kind of current token.
177  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
178
179  /// is - Check if the current token has kind \p K.
180  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
181
182  /// isNot - Check if the current token has kind \p K.
183  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
184
185  /// setSkipSpace - Set whether spaces should be ignored by the lexer
186  void setSkipSpace(bool val) { SkipSpace = val; }
187
188  bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
189  void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
190};
191
192} // End llvm namespace
193
194#endif
195