1a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//
3a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//                     The LLVM Compiler Infrastructure
4a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//
5a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// This file is distributed under the University of Illinois Open Source
6a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// License. See LICENSE.TXT for details.
7a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//
8a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===----------------------------------------------------------------------===//
9a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//
10a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// This class implements the lexer for assembly files.
11a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//
12a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===----------------------------------------------------------------------===//
13a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner
14be343b3ca3f53d5d5e29f3591af8b9bb831daa98Chris Lattner#include "llvm/MC/MCParser/AsmLexer.h"
159823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby#include "llvm/MC/MCAsmInfo.h"
16d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/MemoryBuffer.h"
17d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/SMLoc.h"
18476b242fe7a61e5f9ac6214b0bc5c680d24f152eNick Lewycky#include <cctype>
194651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner#include <cerrno>
2066b9f29df22a84f25007c1a84b4d0bb8060552b9Duncan Sands#include <cstdio>
214506bd2cfd3e75535670890031eec26e216993b8Chris Lattner#include <cstdlib>
22a59e8779964992457ada1af6a5f48068523cfd42Chris Lattnerusing namespace llvm;
23a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner
24fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean CallananAsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI)  {
25dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  CurPtr = nullptr;
266148225b9590f18fcb6a1d3151d3158b316965e0Jim Grosbach  isAtStartOfLine = true;
2736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
28faf32c102db556e367af1e5bca7359160840d2d0Chris Lattner}
29faf32c102db556e367af1e5bca7359160840d2d0Chris Lattner
30faf32c102db556e367af1e5bca7359160840d2d0Chris LattnerAsmLexer::~AsmLexer() {
31a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner}
32a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner
33cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hinesvoid AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
34cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  CurBuf = Buf;
35de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
36fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan  if (ptr)
37fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan    CurPtr = ptr;
38fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan  else
39cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    CurPtr = CurBuf.begin();
40de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
41dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  TokStart = nullptr;
42fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan}
43fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan
444651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// ReturnError - Set the error to the specified string at the specified
453f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar/// location.  This is defined to always return AsmToken::Error.
46cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
4779036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  SetError(SMLoc::getFromPointer(Loc), Msg);
48de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
493f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  return AsmToken(AsmToken::Error, StringRef(Loc, 0));
504651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner}
514651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
52a59e8779964992457ada1af6a5f48068523cfd42Chris Lattnerint AsmLexer::getNextChar() {
53a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  char CurChar = *CurPtr++;
54a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  switch (CurChar) {
55a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  default:
56a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    return (unsigned char)CurChar;
57fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan  case 0:
58a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    // A nul character in the stream is either the end of the current buffer or
59a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    // a random nul in the file.  Disambiguate that here.
60cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (CurPtr - 1 != CurBuf.end())
61a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner      return 0;  // Just whitespace.
62de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
63a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    // Otherwise, return end of file.
64de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    --CurPtr;  // Another call to lex will return EOF again.
65a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    return EOF;
66a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  }
67a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner}
68a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner
694f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
704f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar///
714f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// The leading integral digit sequence and dot should have already been
724f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// consumed, some or all of the fractional digit sequence *can* have been
734f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// consumed.
744f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel DunbarAsmToken AsmLexer::LexFloatLiteral() {
754f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  // Skip the fractional digit sequence.
764f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  while (isdigit(*CurPtr))
774f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    ++CurPtr;
784f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar
794f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  // Check for exponent; we intentionally accept a slighlty wider set of
804f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  // literals here and rely on the upstream client to reject invalid ones (e.g.,
814f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  // "1e+").
824f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  if (*CurPtr == 'e' || *CurPtr == 'E') {
834f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    ++CurPtr;
844f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    if (*CurPtr == '-' || *CurPtr == '+')
854f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar      ++CurPtr;
864f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    while (isdigit(*CurPtr))
874f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar      ++CurPtr;
884f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  }
894f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar
904f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  return AsmToken(AsmToken::Real,
914f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar                  StringRef(TokStart, CurPtr - TokStart));
924f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar}
934f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar
94337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
95337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// while making sure there are enough actual digits around for the constant to
96337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// be valid.
97337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover///
98337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
99337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// before we get here.
100337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim NorthoverAsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
101337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
102337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover         "unexpected parse state in floating hex");
103337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  bool NoFracDigits = true;
104337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
105337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  // Skip the fractional part if there is one
106337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  if (*CurPtr == '.') {
107337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    ++CurPtr;
108337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
109337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    const char *FracStart = CurPtr;
110337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    while (isxdigit(*CurPtr))
111337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover      ++CurPtr;
112337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
113337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    NoFracDigits = CurPtr == FracStart;
114337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  }
115337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
116337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  if (NoIntDigits && NoFracDigits)
117337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
118337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover                                 "expected at least one significand digit");
119337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
120337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  // Make sure we do have some kind of proper exponent part
121337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  if (*CurPtr != 'p' && *CurPtr != 'P')
122337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
123337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover                                 "expected exponent part 'p'");
124337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  ++CurPtr;
125337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
126337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  if (*CurPtr == '+' || *CurPtr == '-')
127337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    ++CurPtr;
128337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
129337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  // N.b. exponent digits are *not* hex
130337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  const char *ExpStart = CurPtr;
131337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  while (isdigit(*CurPtr))
132337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    ++CurPtr;
133337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
134337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  if (CurPtr == ExpStart)
135337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
136337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover                                 "expected at least one exponent digit");
137337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
138337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover  return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
139337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover}
140337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
14122f9dd4591e8af6d6feed10a4b6e11a784582edcHans Wennborg/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
14236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic bool IsIdentifierChar(char c, bool AllowAt) {
14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return isalnum(c) || c == '_' || c == '$' || c == '.' ||
14436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines         (c == '@' && AllowAt) || c == '?';
14554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar}
146cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexIdentifier() {
14754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar  // Check for floating point literals.
14854f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar  if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
1494f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    // Disambiguate a .1243foo identifier from a floating literal.
15054f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar    while (isdigit(*CurPtr))
15154f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar      ++CurPtr;
15236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (*CurPtr == 'e' || *CurPtr == 'E' ||
15336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
1544f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar      return LexFloatLiteral();
15554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar  }
15654f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar
15736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
1584651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    ++CurPtr;
159de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
160d305035155ef3d138e102434bf5a733ea2e32405Chris Lattner  // Handle . as a special case.
1615fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar  if (CurPtr == TokStart+1 && TokStart[0] == '.')
1625fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar    return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
163de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
1643f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
1654651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner}
1664651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
1674651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// LexSlash: Slash: /
1684651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner///           C-Style Comment: /* ... */
169cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexSlash() {
170383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar  switch (*CurPtr) {
171383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar  case '*': break; // C style comment.
172383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar  case '/': return ++CurPtr, LexLineComment();
173bdf90d679befafe70b93082042266ba58a9ad0b2Daniel Dunbar  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
174383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar  }
1754651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
1764651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // C Style comment.
1774651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  ++CurPtr;  // skip the star.
1784651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  while (1) {
1794651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    int CurChar = getNextChar();
1804651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    switch (CurChar) {
1814651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    case EOF:
18227aa7d259b416a9d1bf837ed2c3c11463367b11cChris Lattner      return ReturnError(TokStart, "unterminated comment");
1834651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    case '*':
1844651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      // End of the comment?
1854651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      if (CurPtr[0] != '/') break;
186de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
1874651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      ++CurPtr;   // End the */.
1884651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      return LexToken();
1894651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    }
1904651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  }
1914651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner}
1924651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
193383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar/// LexLineComment: Comment: #[^\n]*
194383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar///                        : //[^\n]*
195cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexLineComment() {
196cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar  // FIXME: This is broken if we happen to a comment at the end of a file, which
197cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar  // was .included, and which doesn't end with a newline.
1984651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  int CurChar = getNextChar();
1990ecd825e54f2235c133b44c967a612551633106cChris Lattner  while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
2004651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    CurChar = getNextChar();
201de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
2024651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  if (CurChar == EOF)
203cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
204cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
2054651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner}
2064651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
207a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattnerstatic void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
208ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach  // Skip ULL, UL, U, L and LL suffices.
209ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach  if (CurPtr[0] == 'U')
210ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach    ++CurPtr;
211ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach  if (CurPtr[0] == 'L')
212ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach    ++CurPtr;
213ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach  if (CurPtr[0] == 'L')
214ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach    ++CurPtr;
215a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner}
216a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner
217d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
218d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier// integer as a hexadecimal, possibly with leading zeroes.
219d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosierstatic unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
220dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const char *FirstHex = nullptr;
221d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  const char *LookAhead = CurPtr;
222d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  while (1) {
223d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    if (isdigit(*LookAhead)) {
224d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      ++LookAhead;
225d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    } else if (isxdigit(*LookAhead)) {
226d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      if (!FirstHex)
227d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier        FirstHex = LookAhead;
228d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      ++LookAhead;
229d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    } else {
230d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      break;
231d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    }
232d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  }
233d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
234e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola  CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
235d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  if (isHex)
236d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    return 16;
237d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  return DefaultRadix;
238d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier}
239d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier
24036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic AsmToken intToken(StringRef Ref, APInt &Value)
24136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines{
24236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (Value.isIntN(64))
24336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return AsmToken(AsmToken::Integer, Ref, Value);
24436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return AsmToken(AsmToken::BigNum, Ref, Value);
24536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
24636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
2474651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// LexDigit: First character is [0-9].
2484651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner///   Local Label: [0-9][:]
249e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola///   Forward/Backward Label: [0-9][fb]
250e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola///   Binary integer: 0b[01]+
2514651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner///   Octal integer: 0[0-7]+
252d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
2534651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner///   Decimal integer: [1-9][0-9]*
254cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexDigit() {
2554651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // Decimal integer: [1-9][0-9]*
256facb34b41cea284b5a0b4992ff619e5cfd5e6a22Daniel Dunbar  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
257d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    unsigned Radix = doLookAhead(CurPtr, 10);
258e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    bool isHex = Radix == 16;
25954f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar    // Check for floating point literals.
260e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
26154f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar      ++CurPtr;
2624f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar      return LexFloatLiteral();
26354f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar    }
26454f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar
265d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner    StringRef Result(TokStart, CurPtr - TokStart);
266a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner
26736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    APInt Value(128, 0, true);
26836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (Result.getAsInteger(Radix, Value))
26936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return ReturnError(TokStart, !isHex ? "invalid decimal number" :
270d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier                           "invalid hexdecimal number");
271de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
272d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    // Consume the [bB][hH].
273d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    if (Radix == 2 || Radix == 16)
274d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      ++CurPtr;
275d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier
276ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach    // The darwin/x86 (and x86-64) assembler accepts and ignores type
277ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach    // suffices on integer literals.
278a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    SkipIgnoredIntegerSuffix(CurPtr);
279de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
28036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return intToken(Result, Value);
2814651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  }
282de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
2834651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  if (*CurPtr == 'b') {
284e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    ++CurPtr;
285e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    // See if we actually have "0b" as part of something like "jmp 0b\n"
286e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    if (!isdigit(CurPtr[0])) {
287e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola      --CurPtr;
288e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola      StringRef Result(TokStart, CurPtr - TokStart);
289e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola      return AsmToken(AsmToken::Integer, Result, 0);
290e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    }
291e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    const char *NumStart = CurPtr;
2924651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    while (CurPtr[0] == '0' || CurPtr[0] == '1')
2934651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      ++CurPtr;
294de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
2954651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    // Requires at least one binary digit.
2964651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    if (CurPtr == NumStart)
29705f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher      return ReturnError(TokStart, "invalid binary number");
298de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
299d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner    StringRef Result(TokStart, CurPtr - TokStart);
300de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
30136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    APInt Value(128, 0, true);
302a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    if (Result.substr(2).getAsInteger(2, Value))
30305f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher      return ReturnError(TokStart, "invalid binary number");
304de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
305a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
306a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    // suffixes on integer literals.
307a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    SkipIgnoredIntegerSuffix(CurPtr);
308de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
30936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return intToken(Result, Value);
3104651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  }
311de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
3124651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  if (*CurPtr == 'x') {
3134651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    ++CurPtr;
3144651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    const char *NumStart = CurPtr;
3154651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    while (isxdigit(CurPtr[0]))
3164651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      ++CurPtr;
317de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
318337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
319337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    // diagnosed by LexHexFloatLiteral).
320337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
321337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover      return LexHexFloatLiteral(NumStart == CurPtr);
322337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover
323337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover    // Otherwise requires at least one hex digit.
3244651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    if (CurPtr == NumStart)
32505f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher      return ReturnError(CurPtr-2, "invalid hexadecimal number");
32603949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner
32736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    APInt Result(128, 0);
32803949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner    if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
32905f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher      return ReturnError(TokStart, "invalid hexadecimal number");
330de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
331d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    // Consume the optional [hH].
332d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    if (*CurPtr == 'h' || *CurPtr == 'H')
333d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      ++CurPtr;
334d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier
335a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
336a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    // suffixes on integer literals.
337a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    SkipIgnoredIntegerSuffix(CurPtr);
338de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
33936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
3404651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  }
341de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
34250e75bfc29269def44981ab5f109334d95f55007Matt Beaumont-Gay  // Either octal or hexadecimal.
34336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  APInt Value(128, 0, true);
344d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  unsigned Radix = doLookAhead(CurPtr, 8);
345e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola  bool isHex = Radix == 16;
346d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  StringRef Result(TokStart, CurPtr - TokStart);
347d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  if (Result.getAsInteger(Radix, Value))
348e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    return ReturnError(TokStart, !isHex ? "invalid octal number" :
34953e5bb70db34b736eed01c1580af1afd7314a2d8Chad Rosier                       "invalid hexdecimal number");
350de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
351e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola  // Consume the [hH].
352e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola  if (Radix == 16)
353d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    ++CurPtr;
354d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier
355a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
356a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner  // suffixes on integer literals.
357a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner  SkipIgnoredIntegerSuffix(CurPtr);
358de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
35936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return intToken(Result, Value);
3604651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner}
3614651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
3627529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky/// LexSingleQuote: Integer: 'b'
3637529b164104f802b59fbf96f5a88d9709ac3ff1aRoman DivackyAsmToken AsmLexer::LexSingleQuote() {
3647529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  int CurChar = getNextChar();
3657529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3667529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  if (CurChar == '\\')
3677529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    CurChar = getNextChar();
3687529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3697529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  if (CurChar == EOF)
3707529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    return ReturnError(TokStart, "unterminated single quote");
3717529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3727529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  CurChar = getNextChar();
3737529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3747529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  if (CurChar != '\'')
3757529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    return ReturnError(TokStart, "single quote way too long");
3767529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3777529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  // The idea here being that 'c' is basically just an integral
3787529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  // constant.
3797529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
3807529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  long long Value;
3817529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3827529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  if (Res.startswith("\'\\")) {
3837529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    char theChar = Res[2];
3847529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    switch (theChar) {
3857529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky      default: Value = theChar; break;
3867529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky      case '\'': Value = '\''; break;
3877529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky      case 't': Value = '\t'; break;
3887529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky      case 'n': Value = '\n'; break;
3897529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky      case 'b': Value = '\b'; break;
3907529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    }
3917529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  } else
3927529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    Value = TokStart[1];
3937529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
394de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach  return AsmToken(AsmToken::Integer, Res, Value);
3957529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky}
3967529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3977529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
39810a907d70fb54c40eecabb889e81c79b44092221Chris Lattner/// LexQuote: String: "..."
399cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexQuote() {
40010a907d70fb54c40eecabb889e81c79b44092221Chris Lattner  int CurChar = getNextChar();
40110a907d70fb54c40eecabb889e81c79b44092221Chris Lattner  // TODO: does gas allow multiline string constants?
40210a907d70fb54c40eecabb889e81c79b44092221Chris Lattner  while (CurChar != '"') {
40310a907d70fb54c40eecabb889e81c79b44092221Chris Lattner    if (CurChar == '\\') {
40410a907d70fb54c40eecabb889e81c79b44092221Chris Lattner      // Allow \", etc.
40510a907d70fb54c40eecabb889e81c79b44092221Chris Lattner      CurChar = getNextChar();
40610a907d70fb54c40eecabb889e81c79b44092221Chris Lattner    }
407de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
40814ee48a5bae352780b767a14bd97e8e91800a95bChris Lattner    if (CurChar == EOF)
40914ee48a5bae352780b767a14bd97e8e91800a95bChris Lattner      return ReturnError(TokStart, "unterminated string constant");
41010a907d70fb54c40eecabb889e81c79b44092221Chris Lattner
41110a907d70fb54c40eecabb889e81c79b44092221Chris Lattner    CurChar = getNextChar();
41210a907d70fb54c40eecabb889e81c79b44092221Chris Lattner  }
413de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
4143f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
41510a907d70fb54c40eecabb889e81c79b44092221Chris Lattner}
41610a907d70fb54c40eecabb889e81c79b44092221Chris Lattner
417ff4bc460c52c1f285d8a56da173641bf92d49e3fChris LattnerStringRef AsmLexer::LexUntilEndOfStatement() {
418ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner  TokStart = CurPtr;
419ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner
420d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach  while (!isAtStartOfComment(*CurPtr) &&    // Start of line comment.
421d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach         !isAtStatementSeparator(CurPtr) && // End of statement marker.
422cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines         *CurPtr != '\n' && *CurPtr != '\r' &&
423cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines         (*CurPtr != 0 || CurPtr != CurBuf.end())) {
424ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner    ++CurPtr;
4259823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby  }
426ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner  return StringRef(TokStart, CurPtr-TokStart);
427ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner}
4284651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
429f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin EnderbyStringRef AsmLexer::LexUntilEndOfLine() {
430f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  TokStart = CurPtr;
431f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby
432cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  while (*CurPtr != '\n' && *CurPtr != '\r' &&
433cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines         (*CurPtr != 0 || CurPtr != CurBuf.end())) {
434f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    ++CurPtr;
435f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  }
436f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  return StringRef(TokStart, CurPtr-TokStart);
437f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby}
438f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby
43936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesconst AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) {
44036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  const char *SavedTokStart = TokStart;
44136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  const char *SavedCurPtr = CurPtr;
44236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool SavedAtStartOfLine = isAtStartOfLine;
44336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool SavedSkipSpace = SkipSpace;
44436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
44536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  std::string SavedErr = getErr();
44636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SMLoc SavedErrLoc = getErrLoc();
44736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
44836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SkipSpace = ShouldSkipSpace;
44936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  AsmToken Token = LexToken();
45036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
45136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SetError(SavedErrLoc, SavedErr);
45236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
45336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SkipSpace = SavedSkipSpace;
45436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  isAtStartOfLine = SavedAtStartOfLine;
45536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  CurPtr = SavedCurPtr;
45636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  TokStart = SavedTokStart;
45736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
45836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return Token;
45936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
46036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
461b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderbybool AsmLexer::isAtStartOfComment(char Char) {
462cec545097cf1a81c0936cf62e39a81940d9f720fChris Lattner  // FIXME: This won't work for multi-character comment indicators like "//".
463cec545097cf1a81c0936cf62e39a81940d9f720fChris Lattner  return Char == *MAI.getCommentString();
464b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby}
465b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby
466d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbachbool AsmLexer::isAtStatementSeparator(const char *Ptr) {
467d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach  return strncmp(Ptr, MAI.getSeparatorString(),
468d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach                 strlen(MAI.getSeparatorString())) == 0;
469d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach}
470d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach
471cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexToken() {
472a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  TokStart = CurPtr;
473a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  // This always consumes at least one character.
474a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  int CurChar = getNextChar();
475de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
476f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  if (isAtStartOfComment(CurChar)) {
477f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    // If this comment starts with a '#', then return the Hash token and let
478f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    // the assembler parser see if it can be parsed as a cpp line filename
479f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    // comment. We do this only if we are at the start of a line.
480f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    if (CurChar == '#' && isAtStartOfLine)
481f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby      return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
482f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    isAtStartOfLine = true;
483b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby    return LexLineComment();
484f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  }
485d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach  if (isAtStatementSeparator(TokStart)) {
486d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach    CurPtr += strlen(MAI.getSeparatorString()) - 1;
487d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach    return AsmToken(AsmToken::EndOfStatement,
488d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach                    StringRef(TokStart, strlen(MAI.getSeparatorString())));
489d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach  }
4909823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby
49170796ca867132fd8c767301061afb9760cd69167Jim Grosbach  // If we're missing a newline at EOF, make sure we still get an
49270796ca867132fd8c767301061afb9760cd69167Jim Grosbach  // EndOfStatement token before the Eof token.
49370796ca867132fd8c767301061afb9760cd69167Jim Grosbach  if (CurChar == EOF && !isAtStartOfLine) {
49470796ca867132fd8c767301061afb9760cd69167Jim Grosbach    isAtStartOfLine = true;
49570796ca867132fd8c767301061afb9760cd69167Jim Grosbach    return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
49670796ca867132fd8c767301061afb9760cd69167Jim Grosbach  }
49770796ca867132fd8c767301061afb9760cd69167Jim Grosbach
49870796ca867132fd8c767301061afb9760cd69167Jim Grosbach  isAtStartOfLine = false;
499a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  switch (CurChar) {
500a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  default:
5015fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar    // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
5025fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar    if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
5034651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      return LexIdentifier();
504de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
505a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    // Unknown character, emit an error.
50627aa7d259b416a9d1bf837ed2c3c11463367b11cChris Lattner    return ReturnError(TokStart, "invalid character in input");
5073f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
508a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  case 0:
509a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  case ' ':
510a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  case '\t':
5117b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd    if (SkipSpace) {
5127b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      // Ignore whitespace.
5137b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      return LexToken();
5147b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd    } else {
5157b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      int len = 1;
5167b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      while (*CurPtr==' ' || *CurPtr=='\t') {
5177b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd        CurPtr++;
5187b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd        len++;
5197b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      }
5207b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      return AsmToken(AsmToken::Space, StringRef(TokStart, len));
5217b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd    }
5224651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  case '\n': // FALL THROUGH.
523d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach  case '\r':
524f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    isAtStartOfLine = true;
525d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach    return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
5263f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
5273f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
5283f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
5293f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
5303f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
5313f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
532fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
533fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
534fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
535fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
5363f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
5373f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
5385fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
539924c5e58f2a7c89019000e7dee3391dcebdf8a21Matt Fleming  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
540653664471333f316020e96dd3d664f4984f66a65Rafael Espindola  case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
541de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach  case '=':
542475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    if (*CurPtr == '=')
5433f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar      return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
5443f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
545de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach  case '|':
546475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    if (*CurPtr == '|')
5473f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar      return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
5483f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
5493f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
550de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach  case '&':
551475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    if (*CurPtr == '&')
5523f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar      return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
5533f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
554de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach  case '!':
555475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    if (*CurPtr == '=')
5563f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar      return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
5573f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
5587b4608dfa018455021050ccd31d3c49aaecf7ff6Kevin Enderby  case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
5594651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  case '/': return LexSlash();
5609823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
5617529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  case '\'': return LexSingleQuote();
56210a907d70fb54c40eecabb889e81c79b44092221Chris Lattner  case '"': return LexQuote();
5634651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  case '0': case '1': case '2': case '3': case '4':
5644651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  case '5': case '6': case '7': case '8': case '9':
5654651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    return LexDigit();
5668dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner  case '<':
567475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    switch (*CurPtr) {
568de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
569cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar                                        StringRef(TokStart, 2));
570de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
571cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar                                        StringRef(TokStart, 2));
572de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
573cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar                                        StringRef(TokStart, 2));
5743f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
5758dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner    }
5768dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner  case '>':
577475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    switch (*CurPtr) {
578de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
579cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar                                        StringRef(TokStart, 2));
580de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
581cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar                                        StringRef(TokStart, 2));
5823f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
5838dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner    }
584de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
5854651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // TODO: Quoted identifiers (objc methods etc)
5864651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // local labels: [0-9][:]
5874651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // Forward/backward labels: [0-9][fb]
5884651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // Integers, fp constants, character constants.
589a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  }
59066b9f29df22a84f25007c1a84b4d0bb8060552b9Duncan Sands}
591