MCAsmLexer.h revision 7b6f2034ac355bd3b3cc88960bf8d0e694fe3db4
1dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//
3dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//                     The LLVM Compiler Infrastructure
4dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//
5dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar// This file is distributed under the University of Illinois Open Source
6dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar// License. See LICENSE.TXT for details.
7dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//
8dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//===----------------------------------------------------------------------===//
9dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar
10dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar#ifndef LLVM_MC_MCASMLEXER_H
11dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar#define LLVM_MC_MCASMLEXER_H
12dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar
13cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar#include "llvm/ADT/StringRef.h"
141f7210e808373fa92be3a2d4fa653a6f79d5088bCraig Topper#include "llvm/Support/Compiler.h"
151f6efa3996dd1929fbc129203ce5009b620e6969Michael J. Spencer#include "llvm/Support/DataTypes.h"
1679036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan#include "llvm/Support/SMLoc.h"
17cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
18dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarnamespace llvm {
19dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar
20cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar/// AsmToken - Target independent representation for an assembler token.
21f007e853e26845cd6866b52d646455fc69f4e0afChris Lattnerclass AsmToken {
22f007e853e26845cd6866b52d646455fc69f4e0afChris Lattnerpublic:
23cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  enum TokenKind {
24cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    // Markers
25cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Eof, Error,
26cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
27cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    // String values.
28cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Identifier,
29cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    String,
3098311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
31cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    // Integer values.
32cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Integer,
3398311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
3454f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar    // Real values.
3554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar    Real,
3698311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
3794b9550a32d189704a8eae55505edf62662c0534Evan Cheng    // Register values (stored in IntVal).  Only used by MCTargetAsmLexer.
385d74e1f64445713cca863af03b8b6ab39321046eSean Callanan    Register,
3998311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
40cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    // No-value.
41cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    EndOfStatement,
42cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Colon,
437b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd    Space,
44cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Plus, Minus, Tilde,
45cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Slash,    // '/'
46653664471333f316020e96dd3d664f4984f66a65Rafael Espindola    BackSlash, // '\'
47fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
48d305035155ef3d138e102434bf5a733ea2e32405Chris Lattner    Star, Dot, Comma, Dollar, Equal, EqualEqual,
4998311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
5098311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach    Pipe, PipePipe, Caret,
519823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
52cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Less, LessEqual, LessLess, LessGreater,
53924c5e58f2a7c89019000e7dee3391dcebdf8a21Matt Fleming    Greater, GreaterEqual, GreaterGreater, At
54cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  };
55cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
56fb22ede033f792196643bad0ceafe473366ddf41Craig Topperprivate:
57cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  TokenKind Kind;
58cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
59cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// A reference to the entire token contents; this is always a pointer into
60cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// a memory buffer owned by the source manager.
61cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  StringRef Str;
62cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
63cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  int64_t IntVal;
64cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
65cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbarpublic:
66cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  AsmToken() {}
672928c83b010f7cfdb0f819199d806f6942a7d995Daniel Dunbar  AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
68cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
69cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
70cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  TokenKind getKind() const { return Kind; }
71cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  bool is(TokenKind K) const { return Kind == K; }
72cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  bool isNot(TokenKind K) const { return Kind != K; }
73cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
74cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  SMLoc getLoc() const;
755efabcf01d1c9cdf7ac59a17d757c6ad4cdb112cBenjamin Kramer  SMLoc getEndLoc() const;
76cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
7776c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  /// getStringContents - Get the contents of a string token (without quotes).
7898311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach  StringRef getStringContents() const {
7976c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar    assert(Kind == String && "This token isn't a string!");
8076c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar    return Str.slice(1, Str.size() - 1);
8176c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  }
8276c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar
8376c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  /// getIdentifier - Get the identifier string for the current token, which
8476c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  /// should be an identifier or a string. This gets the portion of the string
8576c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  /// which should be used as the identifier, e.g., it does not include the
8676c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  /// quotes on strings.
8776c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  StringRef getIdentifier() const {
8876c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar    if (Kind == Identifier)
8976c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar      return getString();
9076c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar    return getStringContents();
9176c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  }
9276c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar
93cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// getString - Get the string for the current token, this includes all
94cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// characters (for example, the quotes on strings) in the token.
95cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  ///
96cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// The returned StringRef points into the source manager's memory buffer, and
97cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// is safe to store across calls to Lex().
98cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  StringRef getString() const { return Str; }
99cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
100cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  // FIXME: Don't compute this in advance, it makes every token larger, and is
101cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  // also not generally what we want (it is nicer for recovery etc. to lex 123br
102cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  // as a single token, then diagnose as an invalid number).
10398311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach  int64_t getIntVal() const {
10476c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar    assert(Kind == Integer && "This token isn't an integer!");
10598311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach    return IntVal;
106cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  }
10798311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
1085d74e1f64445713cca863af03b8b6ab39321046eSean Callanan  /// getRegVal - Get the register number for the current token, which should
1095d74e1f64445713cca863af03b8b6ab39321046eSean Callanan  /// be a register.
1105d74e1f64445713cca863af03b8b6ab39321046eSean Callanan  unsigned getRegVal() const {
1115d74e1f64445713cca863af03b8b6ab39321046eSean Callanan    assert(Kind == Register && "This token isn't a register!");
1125d74e1f64445713cca863af03b8b6ab39321046eSean Callanan    return static_cast<unsigned>(IntVal);
1135d74e1f64445713cca863af03b8b6ab39321046eSean Callanan  }
114cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar};
115cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
116dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar/// MCAsmLexer - Generic assembler lexer interface, for use by target specific
117dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar/// assembly lexers.
118dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarclass MCAsmLexer {
119cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// The current token, stored in the base class for faster access.
120cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  AsmToken CurTok;
12198311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
12279036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  /// The location and description of the current error
12379036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  SMLoc ErrLoc;
12479036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  std::string Err;
125cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
1261f7210e808373fa92be3a2d4fa653a6f79d5088bCraig Topper  MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
1271f7210e808373fa92be3a2d4fa653a6f79d5088bCraig Topper  void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
128dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarprotected: // Can only create subclasses.
129ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar  const char *TokStart;
1307b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd  bool SkipSpace;
131ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar
132dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar  MCAsmLexer();
133cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
134cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  virtual AsmToken LexToken() = 0;
13598311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
13679036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  void SetError(const SMLoc &errLoc, const std::string &err) {
13779036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan    ErrLoc = errLoc;
13879036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan    Err = err;
13979036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  }
14098311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
141dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarpublic:
142dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar  virtual ~MCAsmLexer();
143cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
144cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// Lex - Consume the next token from the input stream and return it.
145cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  ///
146cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// The lexer will continuosly return the end-of-file token once the end of
147cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// the main input file has been reached.
148cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  const AsmToken &Lex() {
149cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    return CurTok = LexToken();
150cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  }
151cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
15219ad3b88f71fdc0fe0ec19e05bb37c3ef1a42b5bDaniel Dunbar  virtual StringRef LexUntilEndOfStatement() = 0;
15319ad3b88f71fdc0fe0ec19e05bb37c3ef1a42b5bDaniel Dunbar
154ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar  /// getLoc - Get the current source location.
155ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar  SMLoc getLoc() const;
156ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar
157cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// getTok - Get the current (last) lexed token.
158cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  const AsmToken &getTok() {
159cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    return CurTok;
160cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  }
16198311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
16279036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  /// getErrLoc - Get the current error location
16379036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  const SMLoc &getErrLoc() {
16479036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan    return ErrLoc;
16579036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  }
16698311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
16779036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  /// getErr - Get the current error string
16879036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  const std::string &getErr() {
16979036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan    return Err;
17079036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  }
171cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
172cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// getKind - Get the kind of current token.
173cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
174cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
175c5252da873d547a19069eaf9030fec203f128f66Dmitri Gribenko  /// is - Check if the current token has kind \p K.
176cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
177cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
178c5252da873d547a19069eaf9030fec203f128f66Dmitri Gribenko  /// isNot - Check if the current token has kind \p K.
179cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
1807b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd
1817b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd  /// setSkipSpace - Set whether spaces should be ignored by the lexer
1827b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd  void setSkipSpace(bool val) { SkipSpace = val; }
183dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar};
184dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar
185dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar} // End llvm namespace
186dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar
187dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar#endif
188