MCAsmLexer.h revision 7b6f2034ac355bd3b3cc88960bf8d0e694fe3db4
1//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef LLVM_MC_MCASMLEXER_H 11#define LLVM_MC_MCASMLEXER_H 12 13#include "llvm/ADT/StringRef.h" 14#include "llvm/Support/Compiler.h" 15#include "llvm/Support/DataTypes.h" 16#include "llvm/Support/SMLoc.h" 17 18namespace llvm { 19 20/// AsmToken - Target independent representation for an assembler token. 21class AsmToken { 22public: 23 enum TokenKind { 24 // Markers 25 Eof, Error, 26 27 // String values. 28 Identifier, 29 String, 30 31 // Integer values. 32 Integer, 33 34 // Real values. 35 Real, 36 37 // Register values (stored in IntVal). Only used by MCTargetAsmLexer. 38 Register, 39 40 // No-value. 41 EndOfStatement, 42 Colon, 43 Space, 44 Plus, Minus, Tilde, 45 Slash, // '/' 46 BackSlash, // '\' 47 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 48 Star, Dot, Comma, Dollar, Equal, EqualEqual, 49 50 Pipe, PipePipe, Caret, 51 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 52 Less, LessEqual, LessLess, LessGreater, 53 Greater, GreaterEqual, GreaterGreater, At 54 }; 55 56private: 57 TokenKind Kind; 58 59 /// A reference to the entire token contents; this is always a pointer into 60 /// a memory buffer owned by the source manager. 61 StringRef Str; 62 63 int64_t IntVal; 64 65public: 66 AsmToken() {} 67 AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0) 68 : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} 69 70 TokenKind getKind() const { return Kind; } 71 bool is(TokenKind K) const { return Kind == K; } 72 bool isNot(TokenKind K) const { return Kind != K; } 73 74 SMLoc getLoc() const; 75 SMLoc getEndLoc() const; 76 77 /// getStringContents - Get the contents of a string token (without quotes). 78 StringRef getStringContents() const { 79 assert(Kind == String && "This token isn't a string!"); 80 return Str.slice(1, Str.size() - 1); 81 } 82 83 /// getIdentifier - Get the identifier string for the current token, which 84 /// should be an identifier or a string. This gets the portion of the string 85 /// which should be used as the identifier, e.g., it does not include the 86 /// quotes on strings. 87 StringRef getIdentifier() const { 88 if (Kind == Identifier) 89 return getString(); 90 return getStringContents(); 91 } 92 93 /// getString - Get the string for the current token, this includes all 94 /// characters (for example, the quotes on strings) in the token. 95 /// 96 /// The returned StringRef points into the source manager's memory buffer, and 97 /// is safe to store across calls to Lex(). 98 StringRef getString() const { return Str; } 99 100 // FIXME: Don't compute this in advance, it makes every token larger, and is 101 // also not generally what we want (it is nicer for recovery etc. to lex 123br 102 // as a single token, then diagnose as an invalid number). 103 int64_t getIntVal() const { 104 assert(Kind == Integer && "This token isn't an integer!"); 105 return IntVal; 106 } 107 108 /// getRegVal - Get the register number for the current token, which should 109 /// be a register. 110 unsigned getRegVal() const { 111 assert(Kind == Register && "This token isn't a register!"); 112 return static_cast<unsigned>(IntVal); 113 } 114}; 115 116/// MCAsmLexer - Generic assembler lexer interface, for use by target specific 117/// assembly lexers. 118class MCAsmLexer { 119 /// The current token, stored in the base class for faster access. 120 AsmToken CurTok; 121 122 /// The location and description of the current error 123 SMLoc ErrLoc; 124 std::string Err; 125 126 MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 127 void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 128protected: // Can only create subclasses. 129 const char *TokStart; 130 bool SkipSpace; 131 132 MCAsmLexer(); 133 134 virtual AsmToken LexToken() = 0; 135 136 void SetError(const SMLoc &errLoc, const std::string &err) { 137 ErrLoc = errLoc; 138 Err = err; 139 } 140 141public: 142 virtual ~MCAsmLexer(); 143 144 /// Lex - Consume the next token from the input stream and return it. 145 /// 146 /// The lexer will continuosly return the end-of-file token once the end of 147 /// the main input file has been reached. 148 const AsmToken &Lex() { 149 return CurTok = LexToken(); 150 } 151 152 virtual StringRef LexUntilEndOfStatement() = 0; 153 154 /// getLoc - Get the current source location. 155 SMLoc getLoc() const; 156 157 /// getTok - Get the current (last) lexed token. 158 const AsmToken &getTok() { 159 return CurTok; 160 } 161 162 /// getErrLoc - Get the current error location 163 const SMLoc &getErrLoc() { 164 return ErrLoc; 165 } 166 167 /// getErr - Get the current error string 168 const std::string &getErr() { 169 return Err; 170 } 171 172 /// getKind - Get the kind of current token. 173 AsmToken::TokenKind getKind() const { return CurTok.getKind(); } 174 175 /// is - Check if the current token has kind \p K. 176 bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } 177 178 /// isNot - Check if the current token has kind \p K. 179 bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } 180 181 /// setSkipSpace - Set whether spaces should be ignored by the lexer 182 void setSkipSpace(bool val) { SkipSpace = val; } 183}; 184 185} // End llvm namespace 186 187#endif 188