1dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//
3dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//                     The LLVM Compiler Infrastructure
4dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//
5dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar// This file is distributed under the University of Illinois Open Source
6dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar// License. See LICENSE.TXT for details.
7dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//
8dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar//===----------------------------------------------------------------------===//
9dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar
10674be02d525d4e24bc6943ed9274958c580bcfbcJakub Staszak#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11674be02d525d4e24bc6943ed9274958c580bcfbcJakub Staszak#define LLVM_MC_MCPARSER_MCASMLEXER_H
12dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar
1336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/ADT/APInt.h"
14cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar#include "llvm/ADT/StringRef.h"
151f7210e808373fa92be3a2d4fa653a6f79d5088bCraig Topper#include "llvm/Support/Compiler.h"
161f6efa3996dd1929fbc129203ce5009b620e6969Michael J. Spencer#include "llvm/Support/DataTypes.h"
1779036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan#include "llvm/Support/SMLoc.h"
18cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
19dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarnamespace llvm {
20dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar
2137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Target independent representation for an assembler token.
22f007e853e26845cd6866b52d646455fc69f4e0afChris Lattnerclass AsmToken {
23f007e853e26845cd6866b52d646455fc69f4e0afChris Lattnerpublic:
24cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  enum TokenKind {
25cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    // Markers
26cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Eof, Error,
27cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
28cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    // String values.
29cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Identifier,
30cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    String,
3198311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
32cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    // Integer values.
33cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Integer,
3436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    BigNum, // larger than 64 bits
3598311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
3654f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar    // Real values.
3754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar    Real,
3898311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
39cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    // No-value.
40cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    EndOfStatement,
41cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Colon,
427b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd    Space,
43cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Plus, Minus, Tilde,
44cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Slash,    // '/'
45653664471333f316020e96dd3d664f4984f66a65Rafael Espindola    BackSlash, // '\'
46fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
47d305035155ef3d138e102434bf5a733ea2e32405Chris Lattner    Star, Dot, Comma, Dollar, Equal, EqualEqual,
4898311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
4998311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach    Pipe, PipePipe, Caret,
509823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
51cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    Less, LessEqual, LessLess, LessGreater,
52924c5e58f2a7c89019000e7dee3391dcebdf8a21Matt Fleming    Greater, GreaterEqual, GreaterGreater, At
53cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  };
54cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
55fb22ede033f792196643bad0ceafe473366ddf41Craig Topperprivate:
56cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  TokenKind Kind;
57cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
58cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// A reference to the entire token contents; this is always a pointer into
59cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// a memory buffer owned by the source manager.
60cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  StringRef Str;
61cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
6236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  APInt IntVal;
63cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
64cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbarpublic:
65cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  AsmToken() {}
664c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
674c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      : Kind(Kind), Str(Str), IntVal(IntVal) {}
684c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
694c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
70cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
71cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  TokenKind getKind() const { return Kind; }
72cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  bool is(TokenKind K) const { return Kind == K; }
73cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  bool isNot(TokenKind K) const { return Kind != K; }
74cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
75cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  SMLoc getLoc() const;
765efabcf01d1c9cdf7ac59a17d757c6ad4cdb112cBenjamin Kramer  SMLoc getEndLoc() const;
7737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  SMRange getLocRange() const;
78cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
7937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Get the contents of a string token (without quotes).
8098311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach  StringRef getStringContents() const {
8176c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar    assert(Kind == String && "This token isn't a string!");
8276c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar    return Str.slice(1, Str.size() - 1);
8376c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  }
8476c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar
8537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Get the identifier string for the current token, which should be an
8637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// identifier or a string. This gets the portion of the string which should
8737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// be used as the identifier, e.g., it does not include the quotes on
8837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// strings.
8976c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  StringRef getIdentifier() const {
9076c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar    if (Kind == Identifier)
9176c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar      return getString();
9276c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar    return getStringContents();
9376c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar  }
9476c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar
9537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Get the string for the current token, this includes all characters (for
9637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// example, the quotes on strings) in the token.
97cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  ///
98cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// The returned StringRef points into the source manager's memory buffer, and
99cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// is safe to store across calls to Lex().
100cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  StringRef getString() const { return Str; }
101cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
102cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  // FIXME: Don't compute this in advance, it makes every token larger, and is
103cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  // also not generally what we want (it is nicer for recovery etc. to lex 123br
104cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  // as a single token, then diagnose as an invalid number).
10598311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach  int64_t getIntVal() const {
10676c4d7696c1eb566d53467a76024c5fdadd448e4Daniel Dunbar    assert(Kind == Integer && "This token isn't an integer!");
10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return IntVal.getZExtValue();
10836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
10936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
11036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  APInt getAPIntVal() const {
11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    assert((Kind == Integer || Kind == BigNum) &&
11236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines           "This token isn't an integer!");
11398311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach    return IntVal;
114cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  }
115cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar};
116cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
11737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Generic assembler lexer interface, for use by target specific assembly
11837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// lexers.
119dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarclass MCAsmLexer {
120cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// The current token, stored in the base class for faster access.
121cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  AsmToken CurTok;
12298311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
12379036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  /// The location and description of the current error
12479036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  SMLoc ErrLoc;
12579036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  std::string Err;
126cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
127ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  MCAsmLexer(const MCAsmLexer &) = delete;
128ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  void operator=(const MCAsmLexer &) = delete;
129dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarprotected: // Can only create subclasses.
130ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar  const char *TokStart;
1317b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd  bool SkipSpace;
13236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool AllowAtInIdentifier;
133ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar
134dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar  MCAsmLexer();
135cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
136cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  virtual AsmToken LexToken() = 0;
13798311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
13879036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  void SetError(const SMLoc &errLoc, const std::string &err) {
13979036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan    ErrLoc = errLoc;
14079036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan    Err = err;
14179036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  }
14298311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
143dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbarpublic:
144dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar  virtual ~MCAsmLexer();
145cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
14637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Consume the next token from the input stream and return it.
147cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  ///
148cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// The lexer will continuosly return the end-of-file token once the end of
149cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  /// the main input file has been reached.
150cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  const AsmToken &Lex() {
151cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    return CurTok = LexToken();
152cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  }
153cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
15419ad3b88f71fdc0fe0ec19e05bb37c3ef1a42b5bDaniel Dunbar  virtual StringRef LexUntilEndOfStatement() = 0;
15519ad3b88f71fdc0fe0ec19e05bb37c3ef1a42b5bDaniel Dunbar
15637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Get the current source location.
157ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar  SMLoc getLoc() const;
158ca90dc6d295f7f6a5ef4240f26bcebe54276def5Daniel Dunbar
15937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Get the current (last) lexed token.
16037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  const AsmToken &getTok() const {
161cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar    return CurTok;
162cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  }
16398311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
16437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Look ahead at the next token to be lexed.
16536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0;
16636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
16737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Get the current error location
16879036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  const SMLoc &getErrLoc() {
16979036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan    return ErrLoc;
17079036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  }
17198311ecb4ae9c82baba9e3a48acf756a81c8e9a4Jim Grosbach
17237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Get the current error string
17379036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  const std::string &getErr() {
17479036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan    return Err;
17579036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  }
176cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
17737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Get the kind of current token.
178cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
179cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
18037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Check if the current token has kind \p K.
181cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
182cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar
18337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Check if the current token has kind \p K.
184cbbe2484418536264b1a26c517c16d505a61d5c8Daniel Dunbar  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
1857b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd
18637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Set whether spaces should be ignored by the lexer
1877b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd  void setSkipSpace(bool val) { SkipSpace = val; }
18836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
18936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
19036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
191dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar};
192dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar
193dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar} // End llvm namespace
194dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar
195dbd692a66e6a5f60ec3ff120ed27ae3a918c375fDaniel Dunbar#endif
196