MCAsmLexer.h revision d305035155ef3d138e102434bf5a733ea2e32405
1//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef LLVM_MC_MCASMLEXER_H 11#define LLVM_MC_MCASMLEXER_H 12 13#include "llvm/ADT/StringRef.h" 14#include "llvm/System/DataTypes.h" 15#include "llvm/Support/SMLoc.h" 16 17namespace llvm { 18class MCAsmLexer; 19class MCInst; 20class Target; 21 22/// AsmToken - Target independent representation for an assembler token. 23class AsmToken { 24public: 25 enum TokenKind { 26 // Markers 27 Eof, Error, 28 29 // String values. 30 Identifier, 31 String, 32 33 // Integer values. 34 Integer, 35 36 // Register values (stored in IntVal). Only used by TargetAsmLexer. 37 Register, 38 39 // No-value. 40 EndOfStatement, 41 Colon, 42 Plus, Minus, Tilde, 43 Slash, // '/' 44 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 45 Star, Dot, Comma, Dollar, Equal, EqualEqual, 46 47 Pipe, PipePipe, Caret, 48 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 49 Less, LessEqual, LessLess, LessGreater, 50 Greater, GreaterEqual, GreaterGreater 51 }; 52 53 TokenKind Kind; 54 55 /// A reference to the entire token contents; this is always a pointer into 56 /// a memory buffer owned by the source manager. 57 StringRef Str; 58 59 int64_t IntVal; 60 61public: 62 AsmToken() {} 63 AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0) 64 : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} 65 66 TokenKind getKind() const { return Kind; } 67 bool is(TokenKind K) const { return Kind == K; } 68 bool isNot(TokenKind K) const { return Kind != K; } 69 70 SMLoc getLoc() const; 71 72 /// getStringContents - Get the contents of a string token (without quotes). 73 StringRef getStringContents() const { 74 assert(Kind == String && "This token isn't a string!"); 75 return Str.slice(1, Str.size() - 1); 76 } 77 78 /// getIdentifier - Get the identifier string for the current token, which 79 /// should be an identifier or a string. This gets the portion of the string 80 /// which should be used as the identifier, e.g., it does not include the 81 /// quotes on strings. 82 StringRef getIdentifier() const { 83 if (Kind == Identifier) 84 return getString(); 85 return getStringContents(); 86 } 87 88 /// getString - Get the string for the current token, this includes all 89 /// characters (for example, the quotes on strings) in the token. 90 /// 91 /// The returned StringRef points into the source manager's memory buffer, and 92 /// is safe to store across calls to Lex(). 93 StringRef getString() const { return Str; } 94 95 // FIXME: Don't compute this in advance, it makes every token larger, and is 96 // also not generally what we want (it is nicer for recovery etc. to lex 123br 97 // as a single token, then diagnose as an invalid number). 98 int64_t getIntVal() const { 99 assert(Kind == Integer && "This token isn't an integer!"); 100 return IntVal; 101 } 102 103 /// getRegVal - Get the register number for the current token, which should 104 /// be a register. 105 unsigned getRegVal() const { 106 assert(Kind == Register && "This token isn't a register!"); 107 return static_cast<unsigned>(IntVal); 108 } 109}; 110 111/// MCAsmLexer - Generic assembler lexer interface, for use by target specific 112/// assembly lexers. 113class MCAsmLexer { 114 /// The current token, stored in the base class for faster access. 115 AsmToken CurTok; 116 117 /// The location and description of the current error 118 SMLoc ErrLoc; 119 std::string Err; 120 121 MCAsmLexer(const MCAsmLexer &); // DO NOT IMPLEMENT 122 void operator=(const MCAsmLexer &); // DO NOT IMPLEMENT 123protected: // Can only create subclasses. 124 MCAsmLexer(); 125 126 virtual AsmToken LexToken() = 0; 127 128 void SetError(const SMLoc &errLoc, const std::string &err) { 129 ErrLoc = errLoc; 130 Err = err; 131 } 132 133public: 134 virtual ~MCAsmLexer(); 135 136 /// Lex - Consume the next token from the input stream and return it. 137 /// 138 /// The lexer will continuosly return the end-of-file token once the end of 139 /// the main input file has been reached. 140 const AsmToken &Lex() { 141 return CurTok = LexToken(); 142 } 143 144 /// getTok - Get the current (last) lexed token. 145 const AsmToken &getTok() { 146 return CurTok; 147 } 148 149 /// getErrLoc - Get the current error location 150 const SMLoc &getErrLoc() { 151 return ErrLoc; 152 } 153 154 /// getErr - Get the current error string 155 const std::string &getErr() { 156 return Err; 157 } 158 159 /// getKind - Get the kind of current token. 160 AsmToken::TokenKind getKind() const { return CurTok.getKind(); } 161 162 /// is - Check if the current token has kind \arg K. 163 bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } 164 165 /// isNot - Check if the current token has kind \arg K. 166 bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } 167}; 168 169} // End llvm namespace 170 171#endif 172