1dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar// 3dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar// The LLVM Compiler Infrastructure 4dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar// 5dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar// This file is distributed under the University of Illinois Open Source 6dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar// License. See LICENSE.TXT for details. 7dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar// 8dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//===----------------------------------------------------------------------===// 9dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar 10674be02d525d4e24bc6943ed9274958c580bcfbcJakub Staszak#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 11674be02d525d4e24bc6943ed9274958c580bcfbcJakub Staszak#define LLVM_MC_MCPARSER_MCASMLEXER_H 12dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar 1336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/ADT/APInt.h" 14cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar#include "llvm/ADT/StringRef.h" 151f7210e808373fa92be3a2d4fa653a6f79d5088bCraig Topper#include "llvm/Support/Compiler.h" 161f6efa3996dd1929fbc129203ce5009b620e6969Michael J. Spencer#include "llvm/Support/DataTypes.h" 1779036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan#include "llvm/Support/SMLoc.h" 18cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 19dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarnamespace llvm { 20dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar 2137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Target independent representation for an assembler token. 22f007e853e26845cd6866b52d646455fc69f4e0afChris Lattnerclass AsmToken { 23f007e853e26845cd6866b52d646455fc69f4e0afChris Lattnerpublic: 24cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar enum TokenKind { 25cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar // Markers 26cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar Eof, Error, 27cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 28cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar // String values. 29cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar Identifier, 30cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar String, 3198311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach 32cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar // Integer values. 33cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar Integer, 3436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BigNum, // larger than 64 bits 3598311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach 3654f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar // Real values. 3754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar Real, 3898311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach 39cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar // No-value. 40cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar EndOfStatement, 41cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar Colon, 427b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd Space, 43cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar Plus, Minus, Tilde, 44cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar Slash, // '/' 45653664471333f316020e96dd3d664f4984f66a65Rafael Espindola BackSlash, // '\' 46fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby LParen, RParen, LBrac, RBrac, LCurly, RCurly, 47d305035155ef3d138e102434bf5a733ea2e32405Chris Lattner Star, Dot, Comma, Dollar, Equal, EqualEqual, 4898311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach 4998311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach Pipe, PipePipe, Caret, 509823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 51cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar Less, LessEqual, LessLess, LessGreater, 52924c5e58f2a7c89019000e7dee3391dcebdf8a21Matt Fleming Greater, GreaterEqual, GreaterGreater, At 53cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar }; 54cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 55fb22ede033f792196643bad0ceafe473366ddf41Craig Topperprivate: 56cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar TokenKind Kind; 57cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 58cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar /// A reference to the entire token contents; this is always a pointer into 59cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar /// a memory buffer owned by the source manager. 60cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar StringRef Str; 61cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 6236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines APInt IntVal; 63cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 64cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbarpublic: 65cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar AsmToken() {} 664c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar AsmToken(TokenKind Kind, StringRef Str, APInt IntVal) 674c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar : Kind(Kind), Str(Str), IntVal(IntVal) {} 684c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0) 694c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {} 70cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 71cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar TokenKind getKind() const { return Kind; } 72cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar bool is(TokenKind K) const { return Kind == K; } 73cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar bool isNot(TokenKind K) const { return Kind != K; } 74cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 75cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar SMLoc getLoc() const; 765efabcf01d1c9cdf7ac59a17d757c6ad4cdb112cBenjamin Kramer SMLoc getEndLoc() const; 7737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SMRange getLocRange() const; 78cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 7937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Get the contents of a string token (without quotes). 8098311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach StringRef getStringContents() const { 8176c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar assert(Kind == String && "This token isn't a string!"); 8276c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar return Str.slice(1, Str.size() - 1); 8376c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar } 8476c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar 8537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Get the identifier string for the current token, which should be an 8637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// identifier or a string. This gets the portion of the string which should 8737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// be used as the identifier, e.g., it does not include the quotes on 8837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// strings. 8976c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar StringRef getIdentifier() const { 9076c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar if (Kind == Identifier) 9176c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar return getString(); 9276c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar return getStringContents(); 9376c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar } 9476c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar 9537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Get the string for the current token, this includes all characters (for 9637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// example, the quotes on strings) in the token. 97cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar /// 98cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar /// The returned StringRef points into the source manager's memory buffer, and 99cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar /// is safe to store across calls to Lex(). 100cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar StringRef getString() const { return Str; } 101cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 102cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar // FIXME: Don't compute this in advance, it makes every token larger, and is 103cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar // also not generally what we want (it is nicer for recovery etc. to lex 123br 104cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar // as a single token, then diagnose as an invalid number). 10598311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach int64_t getIntVal() const { 10676c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar assert(Kind == Integer && "This token isn't an integer!"); 10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return IntVal.getZExtValue(); 10836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 10936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 11036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines APInt getAPIntVal() const { 11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines assert((Kind == Integer || Kind == BigNum) && 11236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines "This token isn't an integer!"); 11398311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach return IntVal; 114cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar } 115cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar}; 116cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 11737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Generic assembler lexer interface, for use by target specific assembly 11837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// lexers. 119dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarclass MCAsmLexer { 120cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar /// The current token, stored in the base class for faster access. 121cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar AsmToken CurTok; 12298311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach 12379036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan /// The location and description of the current error 12479036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan SMLoc ErrLoc; 12579036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan std::string Err; 126cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 127ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines MCAsmLexer(const MCAsmLexer &) = delete; 128ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines void operator=(const MCAsmLexer &) = delete; 129dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarprotected: // Can only create subclasses. 130ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar const char *TokStart; 1317b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd bool SkipSpace; 13236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool AllowAtInIdentifier; 133ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar 134dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar MCAsmLexer(); 135cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 136cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar virtual AsmToken LexToken() = 0; 13798311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach 13879036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan void SetError(const SMLoc &errLoc, const std::string &err) { 13979036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan ErrLoc = errLoc; 14079036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan Err = err; 14179036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan } 14298311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach 143dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarpublic: 144dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar virtual ~MCAsmLexer(); 145cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 14637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Consume the next token from the input stream and return it. 147cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar /// 148cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar /// The lexer will continuosly return the end-of-file token once the end of 149cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar /// the main input file has been reached. 150cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar const AsmToken &Lex() { 151cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar return CurTok = LexToken(); 152cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar } 153cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 15419ad3b88f71fdc0fe0ec19e05bb37c3ef1a42b5bDaniel Dunbar virtual StringRef LexUntilEndOfStatement() = 0; 15519ad3b88f71fdc0fe0ec19e05bb37c3ef1a42b5bDaniel Dunbar 15637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Get the current source location. 157ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar SMLoc getLoc() const; 158ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar 15937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Get the current (last) lexed token. 16037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const AsmToken &getTok() const { 161cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar return CurTok; 162cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar } 16398311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach 16437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Look ahead at the next token to be lexed. 16536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0; 16636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 16737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Get the current error location 16879036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan const SMLoc &getErrLoc() { 16979036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan return ErrLoc; 17079036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan } 17198311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach 17237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Get the current error string 17379036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan const std::string &getErr() { 17479036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan return Err; 17579036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan } 176cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 17737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Get the kind of current token. 178cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar AsmToken::TokenKind getKind() const { return CurTok.getKind(); } 179cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 18037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Check if the current token has kind \p K. 181cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } 182cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar 18337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Check if the current token has kind \p K. 184cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } 1857b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd 18637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Set whether spaces should be ignored by the lexer 1877b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd void setSkipSpace(bool val) { SkipSpace = val; } 18836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 18936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } 19036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } 191dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar}; 192dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar 193dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar} // End llvm namespace 194dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar 195dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar#endif 196