AsmLexer.cpp revision 50e75bfc29269def44981ab5f109334d95f55007
1a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//
3a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//                     The LLVM Compiler Infrastructure
4a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//
5a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// This file is distributed under the University of Illinois Open Source
6a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// License. See LICENSE.TXT for details.
7a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//
8a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===----------------------------------------------------------------------===//
9a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//
10a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// This class implements the lexer for assembly files.
11a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//
12a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===----------------------------------------------------------------------===//
13a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner
14be343b3ca3f53d5d5e29f3591af8b9bb831daa98Chris Lattner#include "llvm/MC/MCParser/AsmLexer.h"
159823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby#include "llvm/MC/MCAsmInfo.h"
16d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/MemoryBuffer.h"
17d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/SMLoc.h"
18476b242fe7a61e5f9ac6214b0bc5c680d24f152eNick Lewycky#include <cctype>
194651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner#include <cerrno>
2066b9f29df22a84f25007c1a84b4d0bb8060552b9Duncan Sands#include <cstdio>
214506bd2cfd3e75535670890031eec26e216993b8Chris Lattner#include <cstdlib>
22a59e8779964992457ada1af6a5f48068523cfd42Chris Lattnerusing namespace llvm;
23a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner
24fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean CallananAsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI)  {
25fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan  CurBuf = NULL;
26fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan  CurPtr = NULL;
276148225b9590f18fcb6a1d3151d3158b316965e0Jim Grosbach  isAtStartOfLine = true;
28faf32c102db556e367af1e5bca7359160840d2d0Chris Lattner}
29faf32c102db556e367af1e5bca7359160840d2d0Chris Lattner
30faf32c102db556e367af1e5bca7359160840d2d0Chris LattnerAsmLexer::~AsmLexer() {
31a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner}
32a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner
33fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callananvoid AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
34fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan  CurBuf = buf;
35de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
36fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan  if (ptr)
37fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan    CurPtr = ptr;
38fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan  else
39fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan    CurPtr = CurBuf->getBufferStart();
40de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
41fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan  TokStart = 0;
42fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan}
43fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan
444651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// ReturnError - Set the error to the specified string at the specified
453f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar/// location.  This is defined to always return AsmToken::Error.
46cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
4779036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan  SetError(SMLoc::getFromPointer(Loc), Msg);
48de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
493f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  return AsmToken(AsmToken::Error, StringRef(Loc, 0));
504651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner}
514651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
52a59e8779964992457ada1af6a5f48068523cfd42Chris Lattnerint AsmLexer::getNextChar() {
53a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  char CurChar = *CurPtr++;
54a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  switch (CurChar) {
55a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  default:
56a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    return (unsigned char)CurChar;
57fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan  case 0:
58a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    // A nul character in the stream is either the end of the current buffer or
59a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    // a random nul in the file.  Disambiguate that here.
60a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    if (CurPtr-1 != CurBuf->getBufferEnd())
61a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner      return 0;  // Just whitespace.
62de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
63a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    // Otherwise, return end of file.
64de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    --CurPtr;  // Another call to lex will return EOF again.
65a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    return EOF;
66a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  }
67a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner}
68a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner
694f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
704f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar///
714f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// The leading integral digit sequence and dot should have already been
724f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// consumed, some or all of the fractional digit sequence *can* have been
734f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// consumed.
744f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel DunbarAsmToken AsmLexer::LexFloatLiteral() {
754f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  // Skip the fractional digit sequence.
764f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  while (isdigit(*CurPtr))
774f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    ++CurPtr;
784f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar
794f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  // Check for exponent; we intentionally accept a slighlty wider set of
804f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  // literals here and rely on the upstream client to reject invalid ones (e.g.,
814f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  // "1e+").
824f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  if (*CurPtr == 'e' || *CurPtr == 'E') {
834f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    ++CurPtr;
844f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    if (*CurPtr == '-' || *CurPtr == '+')
854f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar      ++CurPtr;
864f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    while (isdigit(*CurPtr))
874f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar      ++CurPtr;
884f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  }
894f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar
904f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar  return AsmToken(AsmToken::Real,
914f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar                  StringRef(TokStart, CurPtr - TokStart));
924f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar}
934f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar
945fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
9554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbarstatic bool IsIdentifierChar(char c) {
9654f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar  return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
9754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar}
98cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexIdentifier() {
9954f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar  // Check for floating point literals.
10054f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar  if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
1014f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    // Disambiguate a .1243foo identifier from a floating literal.
10254f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar    while (isdigit(*CurPtr))
10354f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar      ++CurPtr;
1044f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar    if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
1054f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar      return LexFloatLiteral();
10654f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar  }
10754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar
10854f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar  while (IsIdentifierChar(*CurPtr))
1094651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    ++CurPtr;
110de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
111d305035155ef3d138e102434bf5a733ea2e32405Chris Lattner  // Handle . as a special case.
1125fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar  if (CurPtr == TokStart+1 && TokStart[0] == '.')
1135fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar    return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
114de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
1153f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
1164651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner}
1174651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
1184651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// LexSlash: Slash: /
1194651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner///           C-Style Comment: /* ... */
120cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexSlash() {
121383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar  switch (*CurPtr) {
122383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar  case '*': break; // C style comment.
123383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar  case '/': return ++CurPtr, LexLineComment();
124bdf90d679befafe70b93082042266ba58a9ad0b2Daniel Dunbar  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
125383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar  }
1264651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
1274651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // C Style comment.
1284651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  ++CurPtr;  // skip the star.
1294651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  while (1) {
1304651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    int CurChar = getNextChar();
1314651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    switch (CurChar) {
1324651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    case EOF:
13327aa7d259b416a9d1bf837ed2c3c11463367b11cChris Lattner      return ReturnError(TokStart, "unterminated comment");
1344651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    case '*':
1354651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      // End of the comment?
1364651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      if (CurPtr[0] != '/') break;
137de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
1384651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      ++CurPtr;   // End the */.
1394651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      return LexToken();
1404651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    }
1414651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  }
1424651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner}
1434651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
144383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar/// LexLineComment: Comment: #[^\n]*
145383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar///                        : //[^\n]*
146cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexLineComment() {
147cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar  // FIXME: This is broken if we happen to a comment at the end of a file, which
148cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar  // was .included, and which doesn't end with a newline.
1494651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  int CurChar = getNextChar();
1500ecd825e54f2235c133b44c967a612551633106cChris Lattner  while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
1514651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    CurChar = getNextChar();
152de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
1534651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  if (CurChar == EOF)
1543f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
1553f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
1564651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner}
1574651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
158a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattnerstatic void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
159a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner  if (CurPtr[0] == 'L' && CurPtr[1] == 'L')
160a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    CurPtr += 2;
161a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner  if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L')
162a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    CurPtr += 3;
163a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner}
164a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner
165d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
166d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier// integer as a hexadecimal, possibly with leading zeroes.
167d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosierstatic unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
168d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  const char *FirstHex = 0;
169d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  const char *LookAhead = CurPtr;
170d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  while (1) {
171d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    if (isdigit(*LookAhead)) {
172d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      ++LookAhead;
173d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    } else if (isxdigit(*LookAhead)) {
174d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      if (!FirstHex)
175d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier        FirstHex = LookAhead;
176d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      ++LookAhead;
177d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    } else {
178d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      break;
179d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    }
180d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  }
181d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
182e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola  CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
183d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  if (isHex)
184d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    return 16;
185d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  return DefaultRadix;
186d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier}
187d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier
1884651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// LexDigit: First character is [0-9].
1894651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner///   Local Label: [0-9][:]
190e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola///   Forward/Backward Label: [0-9][fb]
191e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola///   Binary integer: 0b[01]+
1924651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner///   Octal integer: 0[0-7]+
193d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
1944651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner///   Decimal integer: [1-9][0-9]*
195cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexDigit() {
1964651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // Decimal integer: [1-9][0-9]*
197facb34b41cea284b5a0b4992ff619e5cfd5e6a22Daniel Dunbar  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
198d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    unsigned Radix = doLookAhead(CurPtr, 10);
199e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    bool isHex = Radix == 16;
20054f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar    // Check for floating point literals.
201e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
20254f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar      ++CurPtr;
2034f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar      return LexFloatLiteral();
20454f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar    }
20554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar
206d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner    StringRef Result(TokStart, CurPtr - TokStart);
207a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner
208d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner    long long Value;
209d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    if (Result.getAsInteger(Radix, Value)) {
2107ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner      // Allow positive values that are too large to fit into a signed 64-bit
2117ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner      // integer, but that do fit in an unsigned one, we just convert them over.
2127ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner      unsigned long long UValue;
213d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      if (Result.getAsInteger(Radix, UValue))
214e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola        return ReturnError(TokStart, !isHex ? "invalid decimal number" :
215d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier                           "invalid hexdecimal number");
2167ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner      Value = (long long)UValue;
2173a151be8d5f85ca12737f5664ef91192014a4f77Chris Lattner    }
218de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
219d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    // Consume the [bB][hH].
220d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    if (Radix == 2 || Radix == 16)
221d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      ++CurPtr;
222d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier
223a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
224a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    // suffixes on integer literals.
225a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    SkipIgnoredIntegerSuffix(CurPtr);
226de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
227d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner    return AsmToken(AsmToken::Integer, Result, Value);
2284651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  }
229de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
2304651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  if (*CurPtr == 'b') {
231e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    ++CurPtr;
232e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    // See if we actually have "0b" as part of something like "jmp 0b\n"
233e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    if (!isdigit(CurPtr[0])) {
234e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola      --CurPtr;
235e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola      StringRef Result(TokStart, CurPtr - TokStart);
236e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola      return AsmToken(AsmToken::Integer, Result, 0);
237e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    }
238e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    const char *NumStart = CurPtr;
2394651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    while (CurPtr[0] == '0' || CurPtr[0] == '1')
2404651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      ++CurPtr;
241de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
2424651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    // Requires at least one binary digit.
2434651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    if (CurPtr == NumStart)
24405f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher      return ReturnError(TokStart, "invalid binary number");
245de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
246d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner    StringRef Result(TokStart, CurPtr - TokStart);
247de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
248d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner    long long Value;
249a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    if (Result.substr(2).getAsInteger(2, Value))
25005f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher      return ReturnError(TokStart, "invalid binary number");
251de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
252a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
253a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    // suffixes on integer literals.
254a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    SkipIgnoredIntegerSuffix(CurPtr);
255de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
256d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner    return AsmToken(AsmToken::Integer, Result, Value);
2574651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  }
258de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
2594651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  if (*CurPtr == 'x') {
2604651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    ++CurPtr;
2614651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    const char *NumStart = CurPtr;
2624651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    while (isxdigit(CurPtr[0]))
2634651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      ++CurPtr;
264de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
2654651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    // Requires at least one hex digit.
2664651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    if (CurPtr == NumStart)
26705f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher      return ReturnError(CurPtr-2, "invalid hexadecimal number");
26803949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner
26903949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner    unsigned long long Result;
27003949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner    if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
27105f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher      return ReturnError(TokStart, "invalid hexadecimal number");
272de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
273d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    // Consume the optional [hH].
274d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    if (*CurPtr == 'h' || *CurPtr == 'H')
275d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier      ++CurPtr;
276d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier
277a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
278a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    // suffixes on integer literals.
279a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner    SkipIgnoredIntegerSuffix(CurPtr);
280de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
2813f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
28203949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner                    (int64_t)Result);
2834651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  }
284de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
28550e75bfc29269def44981ab5f109334d95f55007Matt Beaumont-Gay  // Either octal or hexadecimal.
286d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner  long long Value;
287d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  unsigned Radix = doLookAhead(CurPtr, 8);
288e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola  bool isHex = Radix == 16;
289d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  StringRef Result(TokStart, CurPtr - TokStart);
290d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier  if (Result.getAsInteger(Radix, Value))
291e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola    return ReturnError(TokStart, !isHex ? "invalid octal number" :
29253e5bb70db34b736eed01c1580af1afd7314a2d8Chad Rosier                       "invalid hexdecimal number");
293de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
294e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola  // Consume the [hH].
295e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola  if (Radix == 16)
296d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier    ++CurPtr;
297d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier
298a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
299a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner  // suffixes on integer literals.
300a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner  SkipIgnoredIntegerSuffix(CurPtr);
301de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
302d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner  return AsmToken(AsmToken::Integer, Result, Value);
3034651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner}
3044651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
3057529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky/// LexSingleQuote: Integer: 'b'
3067529b164104f802b59fbf96f5a88d9709ac3ff1aRoman DivackyAsmToken AsmLexer::LexSingleQuote() {
3077529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  int CurChar = getNextChar();
3087529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3097529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  if (CurChar == '\\')
3107529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    CurChar = getNextChar();
3117529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3127529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  if (CurChar == EOF)
3137529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    return ReturnError(TokStart, "unterminated single quote");
3147529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3157529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  CurChar = getNextChar();
3167529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3177529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  if (CurChar != '\'')
3187529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    return ReturnError(TokStart, "single quote way too long");
3197529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3207529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  // The idea here being that 'c' is basically just an integral
3217529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  // constant.
3227529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
3237529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  long long Value;
3247529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3257529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  if (Res.startswith("\'\\")) {
3267529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    char theChar = Res[2];
3277529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    switch (theChar) {
3287529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky      default: Value = theChar; break;
3297529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky      case '\'': Value = '\''; break;
3307529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky      case 't': Value = '\t'; break;
3317529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky      case 'n': Value = '\n'; break;
3327529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky      case 'b': Value = '\b'; break;
3337529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    }
3347529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  } else
3357529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky    Value = TokStart[1];
3367529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
337de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach  return AsmToken(AsmToken::Integer, Res, Value);
3387529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky}
3397529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
3407529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky
34110a907d70fb54c40eecabb889e81c79b44092221Chris Lattner/// LexQuote: String: "..."
342cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexQuote() {
34310a907d70fb54c40eecabb889e81c79b44092221Chris Lattner  int CurChar = getNextChar();
34410a907d70fb54c40eecabb889e81c79b44092221Chris Lattner  // TODO: does gas allow multiline string constants?
34510a907d70fb54c40eecabb889e81c79b44092221Chris Lattner  while (CurChar != '"') {
34610a907d70fb54c40eecabb889e81c79b44092221Chris Lattner    if (CurChar == '\\') {
34710a907d70fb54c40eecabb889e81c79b44092221Chris Lattner      // Allow \", etc.
34810a907d70fb54c40eecabb889e81c79b44092221Chris Lattner      CurChar = getNextChar();
34910a907d70fb54c40eecabb889e81c79b44092221Chris Lattner    }
350de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
35114ee48a5bae352780b767a14bd97e8e91800a95bChris Lattner    if (CurChar == EOF)
35214ee48a5bae352780b767a14bd97e8e91800a95bChris Lattner      return ReturnError(TokStart, "unterminated string constant");
35310a907d70fb54c40eecabb889e81c79b44092221Chris Lattner
35410a907d70fb54c40eecabb889e81c79b44092221Chris Lattner    CurChar = getNextChar();
35510a907d70fb54c40eecabb889e81c79b44092221Chris Lattner  }
356de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
3573f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
35810a907d70fb54c40eecabb889e81c79b44092221Chris Lattner}
35910a907d70fb54c40eecabb889e81c79b44092221Chris Lattner
360ff4bc460c52c1f285d8a56da173641bf92d49e3fChris LattnerStringRef AsmLexer::LexUntilEndOfStatement() {
361ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner  TokStart = CurPtr;
362ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner
363d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach  while (!isAtStartOfComment(*CurPtr) &&    // Start of line comment.
364d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach         !isAtStatementSeparator(CurPtr) && // End of statement marker.
365ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner         *CurPtr != '\n' &&
366ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner         *CurPtr != '\r' &&
3679823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby         (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
368ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner    ++CurPtr;
3699823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby  }
370ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner  return StringRef(TokStart, CurPtr-TokStart);
371ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner}
3724651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner
373f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin EnderbyStringRef AsmLexer::LexUntilEndOfLine() {
374f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  TokStart = CurPtr;
375f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby
376f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  while (*CurPtr != '\n' &&
377f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby         *CurPtr != '\r' &&
378f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby         (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
379f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    ++CurPtr;
380f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  }
381f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  return StringRef(TokStart, CurPtr-TokStart);
382f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby}
383f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby
384b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderbybool AsmLexer::isAtStartOfComment(char Char) {
385cec545097cf1a81c0936cf62e39a81940d9f720fChris Lattner  // FIXME: This won't work for multi-character comment indicators like "//".
386cec545097cf1a81c0936cf62e39a81940d9f720fChris Lattner  return Char == *MAI.getCommentString();
387b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby}
388b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby
389d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbachbool AsmLexer::isAtStatementSeparator(const char *Ptr) {
390d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach  return strncmp(Ptr, MAI.getSeparatorString(),
391d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach                 strlen(MAI.getSeparatorString())) == 0;
392d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach}
393d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach
394cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexToken() {
395a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  TokStart = CurPtr;
396a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  // This always consumes at least one character.
397a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  int CurChar = getNextChar();
398de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
399f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  if (isAtStartOfComment(CurChar)) {
400f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    // If this comment starts with a '#', then return the Hash token and let
401f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    // the assembler parser see if it can be parsed as a cpp line filename
402f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    // comment. We do this only if we are at the start of a line.
403f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    if (CurChar == '#' && isAtStartOfLine)
404f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby      return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
405f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    isAtStartOfLine = true;
406b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby    return LexLineComment();
407f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby  }
408d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach  if (isAtStatementSeparator(TokStart)) {
409d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach    CurPtr += strlen(MAI.getSeparatorString()) - 1;
410d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach    return AsmToken(AsmToken::EndOfStatement,
411d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach                    StringRef(TokStart, strlen(MAI.getSeparatorString())));
412d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach  }
4139823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby
41470796ca867132fd8c767301061afb9760cd69167Jim Grosbach  // If we're missing a newline at EOF, make sure we still get an
41570796ca867132fd8c767301061afb9760cd69167Jim Grosbach  // EndOfStatement token before the Eof token.
41670796ca867132fd8c767301061afb9760cd69167Jim Grosbach  if (CurChar == EOF && !isAtStartOfLine) {
41770796ca867132fd8c767301061afb9760cd69167Jim Grosbach    isAtStartOfLine = true;
41870796ca867132fd8c767301061afb9760cd69167Jim Grosbach    return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
41970796ca867132fd8c767301061afb9760cd69167Jim Grosbach  }
42070796ca867132fd8c767301061afb9760cd69167Jim Grosbach
42170796ca867132fd8c767301061afb9760cd69167Jim Grosbach  isAtStartOfLine = false;
422a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  switch (CurChar) {
423a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  default:
4245fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar    // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
4255fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar    if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
4264651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner      return LexIdentifier();
427de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
428a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner    // Unknown character, emit an error.
42927aa7d259b416a9d1bf837ed2c3c11463367b11cChris Lattner    return ReturnError(TokStart, "invalid character in input");
4303f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
431a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  case 0:
432a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  case ' ':
433a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  case '\t':
4347b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd    if (SkipSpace) {
4357b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      // Ignore whitespace.
4367b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      return LexToken();
4377b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd    } else {
4387b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      int len = 1;
4397b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      while (*CurPtr==' ' || *CurPtr=='\t') {
4407b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd        CurPtr++;
4417b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd        len++;
4427b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      }
4437b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd      return AsmToken(AsmToken::Space, StringRef(TokStart, len));
4447b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd    }
4454651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  case '\n': // FALL THROUGH.
446d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach  case '\r':
447f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby    isAtStartOfLine = true;
448d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach    return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
4493f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
4503f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
4513f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
4523f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
4533f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
4543f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
455fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
456fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
457fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
458fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
4593f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
4603f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
4615fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
462924c5e58f2a7c89019000e7dee3391dcebdf8a21Matt Fleming  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
463653664471333f316020e96dd3d664f4984f66a65Rafael Espindola  case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
464de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach  case '=':
465475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    if (*CurPtr == '=')
4663f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar      return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
4673f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
468de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach  case '|':
469475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    if (*CurPtr == '|')
4703f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar      return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
4713f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
4723f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
473de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach  case '&':
474475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    if (*CurPtr == '&')
4753f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar      return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
4763f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
477de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach  case '!':
478475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    if (*CurPtr == '=')
4793f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar      return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
4803f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
4817b4608dfa018455021050ccd31d3c49aaecf7ff6Kevin Enderby  case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
4824651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  case '/': return LexSlash();
4839823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
4847529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky  case '\'': return LexSingleQuote();
48510a907d70fb54c40eecabb889e81c79b44092221Chris Lattner  case '"': return LexQuote();
4864651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  case '0': case '1': case '2': case '3': case '4':
4874651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  case '5': case '6': case '7': case '8': case '9':
4884651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner    return LexDigit();
4898dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner  case '<':
490475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    switch (*CurPtr) {
491de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
492cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar                                        StringRef(TokStart, 2));
493de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
494cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar                                        StringRef(TokStart, 2));
495de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
496cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar                                        StringRef(TokStart, 2));
4973f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
4988dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner    }
4998dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner  case '>':
500475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar    switch (*CurPtr) {
501de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
502cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar                                        StringRef(TokStart, 2));
503de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach    case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
504cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar                                        StringRef(TokStart, 2));
5053f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar    default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
5068dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner    }
507de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach
5084651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // TODO: Quoted identifiers (objc methods etc)
5094651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // local labels: [0-9][:]
5104651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // Forward/backward labels: [0-9][fb]
5114651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner  // Integers, fp constants, character constants.
512a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner  }
51366b9f29df22a84f25007c1a84b4d0bb8060552b9Duncan Sands}
514