MCAsmLexer.h revision 653664471333f316020e96dd3d664f4984f66a65
1//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_MC_MCASMLEXER_H
11#define LLVM_MC_MCASMLEXER_H
12
13#include "llvm/ADT/StringRef.h"
14#include "llvm/Support/DataTypes.h"
15#include "llvm/Support/SMLoc.h"
16
17namespace llvm {
18class MCAsmLexer;
19class MCInst;
20class Target;
21
22/// AsmToken - Target independent representation for an assembler token.
23class AsmToken {
24public:
25  enum TokenKind {
26    // Markers
27    Eof, Error,
28
29    // String values.
30    Identifier,
31    String,
32
33    // Integer values.
34    Integer,
35
36    // Real values.
37    Real,
38
39    // Register values (stored in IntVal).  Only used by TargetAsmLexer.
40    Register,
41
42    // No-value.
43    EndOfStatement,
44    Colon,
45    Plus, Minus, Tilde,
46    Slash,    // '/'
47    BackSlash, // '\'
48    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
49    Star, Dot, Comma, Dollar, Equal, EqualEqual,
50
51    Pipe, PipePipe, Caret,
52    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
53    Less, LessEqual, LessLess, LessGreater,
54    Greater, GreaterEqual, GreaterGreater, At
55  };
56
57  TokenKind Kind;
58
59  /// A reference to the entire token contents; this is always a pointer into
60  /// a memory buffer owned by the source manager.
61  StringRef Str;
62
63  int64_t IntVal;
64
65public:
66  AsmToken() {}
67  AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
68    : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
69
70  TokenKind getKind() const { return Kind; }
71  bool is(TokenKind K) const { return Kind == K; }
72  bool isNot(TokenKind K) const { return Kind != K; }
73
74  SMLoc getLoc() const;
75
76  /// getStringContents - Get the contents of a string token (without quotes).
77  StringRef getStringContents() const {
78    assert(Kind == String && "This token isn't a string!");
79    return Str.slice(1, Str.size() - 1);
80  }
81
82  /// getIdentifier - Get the identifier string for the current token, which
83  /// should be an identifier or a string. This gets the portion of the string
84  /// which should be used as the identifier, e.g., it does not include the
85  /// quotes on strings.
86  StringRef getIdentifier() const {
87    if (Kind == Identifier)
88      return getString();
89    return getStringContents();
90  }
91
92  /// getString - Get the string for the current token, this includes all
93  /// characters (for example, the quotes on strings) in the token.
94  ///
95  /// The returned StringRef points into the source manager's memory buffer, and
96  /// is safe to store across calls to Lex().
97  StringRef getString() const { return Str; }
98
99  // FIXME: Don't compute this in advance, it makes every token larger, and is
100  // also not generally what we want (it is nicer for recovery etc. to lex 123br
101  // as a single token, then diagnose as an invalid number).
102  int64_t getIntVal() const {
103    assert(Kind == Integer && "This token isn't an integer!");
104    return IntVal;
105  }
106
107  /// getRegVal - Get the register number for the current token, which should
108  /// be a register.
109  unsigned getRegVal() const {
110    assert(Kind == Register && "This token isn't a register!");
111    return static_cast<unsigned>(IntVal);
112  }
113};
114
115/// MCAsmLexer - Generic assembler lexer interface, for use by target specific
116/// assembly lexers.
117class MCAsmLexer {
118  /// The current token, stored in the base class for faster access.
119  AsmToken CurTok;
120
121  /// The location and description of the current error
122  SMLoc ErrLoc;
123  std::string Err;
124
125  MCAsmLexer(const MCAsmLexer &);   // DO NOT IMPLEMENT
126  void operator=(const MCAsmLexer &);  // DO NOT IMPLEMENT
127protected: // Can only create subclasses.
128  const char *TokStart;
129
130  MCAsmLexer();
131
132  virtual AsmToken LexToken() = 0;
133
134  void SetError(const SMLoc &errLoc, const std::string &err) {
135    ErrLoc = errLoc;
136    Err = err;
137  }
138
139public:
140  virtual ~MCAsmLexer();
141
142  /// Lex - Consume the next token from the input stream and return it.
143  ///
144  /// The lexer will continuosly return the end-of-file token once the end of
145  /// the main input file has been reached.
146  const AsmToken &Lex() {
147    return CurTok = LexToken();
148  }
149
150  virtual StringRef LexUntilEndOfStatement() = 0;
151
152  /// getLoc - Get the current source location.
153  SMLoc getLoc() const;
154
155  /// getTok - Get the current (last) lexed token.
156  const AsmToken &getTok() {
157    return CurTok;
158  }
159
160  /// getErrLoc - Get the current error location
161  const SMLoc &getErrLoc() {
162    return ErrLoc;
163  }
164
165  /// getErr - Get the current error string
166  const std::string &getErr() {
167    return Err;
168  }
169
170  /// getKind - Get the kind of current token.
171  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
172
173  /// is - Check if the current token has kind \arg K.
174  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
175
176  /// isNot - Check if the current token has kind \arg K.
177  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
178};
179
180} // End llvm namespace
181
182#endif
183