AsmLexer.cpp revision ca90dc6d295f7f6a5ef4240f26bcebe54276def5
18d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
28d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//
38d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//                     The LLVM Compiler Infrastructure
48d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//
58d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// This file is distributed under the University of Illinois Open Source
68d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// License. See LICENSE.TXT for details.
78d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//
88d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//===----------------------------------------------------------------------===//
98d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//
108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// This class implements the lexer for assembly files.
118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//
128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//===----------------------------------------------------------------------===//
138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "llvm/MC/MCParser/AsmLexer.h"
158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "llvm/Support/SMLoc.h"
168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "llvm/Support/MemoryBuffer.h"
178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "llvm/MC/MCAsmInfo.h"
188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include <cerrno>
198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include <cstdio>
208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include <cstdlib>
218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtusing namespace llvm;
228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
238d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI)  {
248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  CurBuf = NULL;
258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  CurPtr = NULL;
268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
288d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmLexer::~AsmLexer() {
298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtvoid AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  CurBuf = buf;
338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  if (ptr)
358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    CurPtr = ptr;
368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  else
378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    CurPtr = CurBuf->getBufferStart();
388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  TokStart = 0;
408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// ReturnError - Set the error to the specified string at the specified
438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// location.  This is defined to always return AsmToken::Error.
448d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  SetError(SMLoc::getFromPointer(Loc), Msg);
468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return AsmToken(AsmToken::Error, StringRef(Loc, 0));
488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtint AsmLexer::getNextChar() {
518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  char CurChar = *CurPtr++;
528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  switch (CurChar) {
538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  default:
548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return (unsigned char)CurChar;
558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case 0:
568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    // A nul character in the stream is either the end of the current buffer or
578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    // a random nul in the file.  Disambiguate that here.
588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (CurPtr-1 != CurBuf->getBufferEnd())
598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return 0;  // Just whitespace.
608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    // Otherwise, return end of file.
628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    --CurPtr;  // Another call to lex will return EOF again.
638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return EOF;
648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  }
658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
688d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexIdentifier() {
698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt         *CurPtr == '.' || *CurPtr == '@')
718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    ++CurPtr;
728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // Handle . as a special case.
748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  if (CurPtr == TokStart+1 && TokStart[0] == '.')
758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// LexSlash: Slash: /
818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt///           C-Style Comment: /* ... */
828d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexSlash() {
838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  switch (*CurPtr) {
848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '*': break; // C style comment.
858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '/': return ++CurPtr, LexLineComment();
868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1));
878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  }
888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // C Style comment.
908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  ++CurPtr;  // skip the star.
918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  while (1) {
928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    int CurChar = getNextChar();
938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    switch (CurChar) {
948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    case EOF:
958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return ReturnError(TokStart, "unterminated comment");
968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    case '*':
978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      // End of the comment?
988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      if (CurPtr[0] != '/') break;
998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      ++CurPtr;   // End the */.
1018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return LexToken();
1028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    }
1038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  }
1048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// LexLineComment: Comment: #[^\n]*
1078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt///                        : //[^\n]*
1088d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexLineComment() {
1098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // FIXME: This is broken if we happen to a comment at the end of a file, which
1108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // was .included, and which doesn't end with a newline.
1118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  int CurChar = getNextChar();
1128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
1138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    CurChar = getNextChar();
1148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  if (CurChar == EOF)
1168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
1178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
1188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
1198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// LexDigit: First character is [0-9].
1228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt///   Local Label: [0-9][:]
1238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt///   Forward/Backward Label: [0-9][fb]
1248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt///   Binary integer: 0b[01]+
1258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt///   Octal integer: 0[0-7]+
1268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt///   Hex integer: 0x[0-9a-fA-F]+
1278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt///   Decimal integer: [1-9][0-9]*
1288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// TODO: FP literal.
1298d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexDigit() {
1308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // Decimal integer: [1-9][0-9]*
1318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  if (CurPtr[-1] != '0') {
1328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    while (isdigit(*CurPtr))
1338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      ++CurPtr;
1348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    StringRef Result(TokStart, CurPtr - TokStart);
1368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    long long Value;
1388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (Result.getAsInteger(10, Value)) {
1398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      // We have to handle minint_as_a_positive_value specially, because
1408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      // - minint_as_a_positive_value = minint and it is valid.
1418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      if (Result == "9223372036854775808")
1428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt        Value = -9223372036854775808ULL;
1438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      else
1448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt        return ReturnError(TokStart, "Invalid decimal number");
1458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    }
1468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return AsmToken(AsmToken::Integer, Result, Value);
1478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  }
1488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  if (*CurPtr == 'b') {
1508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    ++CurPtr;
1518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    // See if we actually have "0b" as part of something like "jmp 0b\n"
1528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (!isdigit(CurPtr[0])) {
1538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      --CurPtr;
1548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      StringRef Result(TokStart, CurPtr - TokStart);
1558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return AsmToken(AsmToken::Integer, Result, 0);
1568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    }
1578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    const char *NumStart = CurPtr;
1588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    while (CurPtr[0] == '0' || CurPtr[0] == '1')
1598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      ++CurPtr;
1608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    // Requires at least one binary digit.
1628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (CurPtr == NumStart)
1638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return ReturnError(TokStart, "Invalid binary number");
1648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    StringRef Result(TokStart, CurPtr - TokStart);
1668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    long long Value;
1688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (Result.getAsInteger(2, Value))
1698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return ReturnError(TokStart, "Invalid binary number");
1708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return AsmToken(AsmToken::Integer, Result, Value);
1728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  }
1738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  if (*CurPtr == 'x') {
1758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    ++CurPtr;
1768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    const char *NumStart = CurPtr;
1778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    while (isxdigit(CurPtr[0]))
1788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      ++CurPtr;
1798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    // Requires at least one hex digit.
1818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (CurPtr == NumStart)
1828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return ReturnError(CurPtr-2, "Invalid hexadecimal number");
1838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    unsigned long long Result;
1858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
1868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return ReturnError(TokStart, "Invalid hexadecimal number");
1878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
1898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                    (int64_t)Result);
1908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  }
1918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // Must be an octal number, it starts with 0.
1938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  while (*CurPtr >= '0' && *CurPtr <= '7')
1948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    ++CurPtr;
1958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
1968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  StringRef Result(TokStart, CurPtr - TokStart);
1978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  long long Value;
1988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  if (Result.getAsInteger(8, Value))
1998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return ReturnError(TokStart, "Invalid octal number");
2008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return AsmToken(AsmToken::Integer, Result, Value);
2028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// LexQuote: String: "..."
2058d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexQuote() {
2068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  int CurChar = getNextChar();
2078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // TODO: does gas allow multiline string constants?
2088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  while (CurChar != '"') {
2098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (CurChar == '\\') {
2108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      // Allow \", etc.
2118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      CurChar = getNextChar();
2128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    }
2138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (CurChar == EOF)
2158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return ReturnError(TokStart, "unterminated string constant");
2168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    CurChar = getNextChar();
2188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  }
2198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
2218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2238d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtStringRef AsmLexer::LexUntilEndOfStatement() {
2248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  TokStart = CurPtr;
2258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
2278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt          *CurPtr != ';' &&  // End of statement marker.
2288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt         *CurPtr != '\n' &&
2298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt         *CurPtr != '\r' &&
2308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt         (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
2318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    ++CurPtr;
2328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  }
2338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return StringRef(TokStart, CurPtr-TokStart);
2348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtbool AsmLexer::isAtStartOfComment(char Char) {
2378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // FIXME: This won't work for multi-character comment indicators like "//".
2388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return Char == *MAI.getCommentString();
2398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
2408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2418d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexToken() {
2428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  TokStart = CurPtr;
2438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // This always consumes at least one character.
2448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  int CurChar = getNextChar();
2458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  if (isAtStartOfComment(CurChar))
2478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return LexLineComment();
2488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  switch (CurChar) {
2508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  default:
2518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
2528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
2538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return LexIdentifier();
2548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
2558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    // Unknown character, emit an error.
2568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return ReturnError(TokStart, "invalid character in input");
2578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
2588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case 0:
2598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case ' ':
2608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '\t':
2618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    // Ignore whitespace.
2628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return LexToken();
2638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '\n': // FALL THROUGH.
2648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '\r': // FALL THROUGH.
2658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
2668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
2678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
2688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
2698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
2708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
2718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
2728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
2738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
2748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
2758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
2768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
2778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
2788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
2798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
2808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '=':
2818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (*CurPtr == '=')
2828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
2838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
2848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '|':
2858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (*CurPtr == '|')
2868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
2878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
2888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
2898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '&':
2908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (*CurPtr == '&')
2918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
2928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
2938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '!':
2948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    if (*CurPtr == '=')
2958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt      return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
2968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
2978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
2988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '/': return LexSlash();
2998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
3008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '"': return LexQuote();
3018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '0': case '1': case '2': case '3': case '4':
3028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '5': case '6': case '7': case '8': case '9':
3038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return LexDigit();
3048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '<':
3058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    switch (*CurPtr) {
3068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
3078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                        StringRef(TokStart, 2));
3088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
3098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                        StringRef(TokStart, 2));
3108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
3118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                        StringRef(TokStart, 2));
3128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
3138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    }
3148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  case '>':
3158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    switch (*CurPtr) {
3168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
3178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                        StringRef(TokStart, 2));
3188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
3198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                        StringRef(TokStart, 2));
3208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
3218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    }
3228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
3238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // TODO: Quoted identifiers (objc methods etc)
3248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // local labels: [0-9][:]
3258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // Forward/backward labels: [0-9][fb]
3268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // Integers, fp constants, character constants.
3278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  }
3288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
3298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt