MCAsmLexer.h revision 19ad3b88f71fdc0fe0ec19e05bb37c3ef1a42b5b
1//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_MC_MCASMLEXER_H
11#define LLVM_MC_MCASMLEXER_H
12
13#include "llvm/ADT/StringRef.h"
14#include "llvm/System/DataTypes.h"
15#include "llvm/Support/SMLoc.h"
16
17namespace llvm {
18class MCAsmLexer;
19class MCInst;
20class Target;
21
22/// AsmToken - Target independent representation for an assembler token.
23class AsmToken {
24public:
25  enum TokenKind {
26    // Markers
27    Eof, Error,
28
29    // String values.
30    Identifier,
31    String,
32
33    // Integer values.
34    Integer,
35
36    // Register values (stored in IntVal).  Only used by TargetAsmLexer.
37    Register,
38
39    // No-value.
40    EndOfStatement,
41    Colon,
42    Plus, Minus, Tilde,
43    Slash,    // '/'
44    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
45    Star, Dot, Comma, Dollar, Equal, EqualEqual,
46
47    Pipe, PipePipe, Caret,
48    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
49    Less, LessEqual, LessLess, LessGreater,
50    Greater, GreaterEqual, GreaterGreater, At
51  };
52
53  TokenKind Kind;
54
55  /// A reference to the entire token contents; this is always a pointer into
56  /// a memory buffer owned by the source manager.
57  StringRef Str;
58
59  int64_t IntVal;
60
61public:
62  AsmToken() {}
63  AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
64    : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
65
66  TokenKind getKind() const { return Kind; }
67  bool is(TokenKind K) const { return Kind == K; }
68  bool isNot(TokenKind K) const { return Kind != K; }
69
70  SMLoc getLoc() const;
71
72  /// getStringContents - Get the contents of a string token (without quotes).
73  StringRef getStringContents() const {
74    assert(Kind == String && "This token isn't a string!");
75    return Str.slice(1, Str.size() - 1);
76  }
77
78  /// getIdentifier - Get the identifier string for the current token, which
79  /// should be an identifier or a string. This gets the portion of the string
80  /// which should be used as the identifier, e.g., it does not include the
81  /// quotes on strings.
82  StringRef getIdentifier() const {
83    if (Kind == Identifier)
84      return getString();
85    return getStringContents();
86  }
87
88  /// getString - Get the string for the current token, this includes all
89  /// characters (for example, the quotes on strings) in the token.
90  ///
91  /// The returned StringRef points into the source manager's memory buffer, and
92  /// is safe to store across calls to Lex().
93  StringRef getString() const { return Str; }
94
95  // FIXME: Don't compute this in advance, it makes every token larger, and is
96  // also not generally what we want (it is nicer for recovery etc. to lex 123br
97  // as a single token, then diagnose as an invalid number).
98  int64_t getIntVal() const {
99    assert(Kind == Integer && "This token isn't an integer!");
100    return IntVal;
101  }
102
103  /// getRegVal - Get the register number for the current token, which should
104  /// be a register.
105  unsigned getRegVal() const {
106    assert(Kind == Register && "This token isn't a register!");
107    return static_cast<unsigned>(IntVal);
108  }
109};
110
111/// MCAsmLexer - Generic assembler lexer interface, for use by target specific
112/// assembly lexers.
113class MCAsmLexer {
114  /// The current token, stored in the base class for faster access.
115  AsmToken CurTok;
116
117  /// The location and description of the current error
118  SMLoc ErrLoc;
119  std::string Err;
120
121  MCAsmLexer(const MCAsmLexer &);   // DO NOT IMPLEMENT
122  void operator=(const MCAsmLexer &);  // DO NOT IMPLEMENT
123protected: // Can only create subclasses.
124  const char *TokStart;
125
126  MCAsmLexer();
127
128  virtual AsmToken LexToken() = 0;
129
130  void SetError(const SMLoc &errLoc, const std::string &err) {
131    ErrLoc = errLoc;
132    Err = err;
133  }
134
135public:
136  virtual ~MCAsmLexer();
137
138  /// Lex - Consume the next token from the input stream and return it.
139  ///
140  /// The lexer will continuosly return the end-of-file token once the end of
141  /// the main input file has been reached.
142  const AsmToken &Lex() {
143    return CurTok = LexToken();
144  }
145
146  virtual StringRef LexUntilEndOfStatement() = 0;
147
148  /// getLoc - Get the current source location.
149  SMLoc getLoc() const;
150
151  /// getTok - Get the current (last) lexed token.
152  const AsmToken &getTok() {
153    return CurTok;
154  }
155
156  /// getErrLoc - Get the current error location
157  const SMLoc &getErrLoc() {
158    return ErrLoc;
159  }
160
161  /// getErr - Get the current error string
162  const std::string &getErr() {
163    return Err;
164  }
165
166  /// getKind - Get the kind of current token.
167  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
168
169  /// is - Check if the current token has kind \arg K.
170  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
171
172  /// isNot - Check if the current token has kind \arg K.
173  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
174};
175
176} // End llvm namespace
177
178#endif
179