AsmLexer.cpp revision ca90dc6d295f7f6a5ef4240f26bcebe54276def5
18d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// 28d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// 38d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// The LLVM Compiler Infrastructure 48d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// 58d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// This file is distributed under the University of Illinois Open Source 68d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// License. See LICENSE.TXT for details. 78d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// 88d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//===----------------------------------------------------------------------===// 98d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// 108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// This class implements the lexer for assembly files. 118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// 128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt//===----------------------------------------------------------------------===// 138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "llvm/MC/MCParser/AsmLexer.h" 158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "llvm/Support/SMLoc.h" 168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "llvm/Support/MemoryBuffer.h" 178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "llvm/MC/MCAsmInfo.h" 188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include <cerrno> 198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include <cstdio> 208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include <cstdlib> 218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtusing namespace llvm; 228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 238d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { 248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt CurBuf = NULL; 258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt CurPtr = NULL; 268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 288d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmLexer::~AsmLexer() { 298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtvoid AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { 328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt CurBuf = buf; 338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (ptr) 358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt CurPtr = ptr; 368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt else 378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt CurPtr = CurBuf->getBufferStart(); 388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt TokStart = 0; 408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// ReturnError - Set the error to the specified string at the specified 438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// location. This is defined to always return AsmToken::Error. 448d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { 458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt SetError(SMLoc::getFromPointer(Loc), Msg); 468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Error, StringRef(Loc, 0)); 488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtint AsmLexer::getNextChar() { 518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt char CurChar = *CurPtr++; 528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt switch (CurChar) { 538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt default: 548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (unsigned char)CurChar; 558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case 0: 568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // A nul character in the stream is either the end of the current buffer or 578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // a random nul in the file. Disambiguate that here. 588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (CurPtr-1 != CurBuf->getBufferEnd()) 598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return 0; // Just whitespace. 608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Otherwise, return end of file. 628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt --CurPtr; // Another call to lex will return EOF again. 638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return EOF; 648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 688d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexIdentifier() { 698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || 708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *CurPtr == '.' || *CurPtr == '@') 718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ++CurPtr; 728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Handle . as a special case. 748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (CurPtr == TokStart+1 && TokStart[0] == '.') 758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); 768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); 788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// LexSlash: Slash: / 818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// C-Style Comment: /* ... */ 828d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexSlash() { 838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt switch (*CurPtr) { 848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '*': break; // C style comment. 858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '/': return ++CurPtr, LexLineComment(); 868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1)); 878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // C Style comment. 908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ++CurPtr; // skip the star. 918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt while (1) { 928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt int CurChar = getNextChar(); 938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt switch (CurChar) { 948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case EOF: 958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ReturnError(TokStart, "unterminated comment"); 968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '*': 978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // End of the comment? 988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (CurPtr[0] != '/') break; 998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ++CurPtr; // End the */. 1018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return LexToken(); 1028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 1038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 1048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// LexLineComment: Comment: #[^\n]* 1078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// : //[^\n]* 1088d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexLineComment() { 1098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // FIXME: This is broken if we happen to a comment at the end of a file, which 1108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // was .included, and which doesn't end with a newline. 1118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt int CurChar = getNextChar(); 1128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) 1138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt CurChar = getNextChar(); 1148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (CurChar == EOF) 1168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); 1178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); 1188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// LexDigit: First character is [0-9]. 1228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// Local Label: [0-9][:] 1238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// Forward/Backward Label: [0-9][fb] 1248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// Binary integer: 0b[01]+ 1258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// Octal integer: 0[0-7]+ 1268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// Hex integer: 0x[0-9a-fA-F]+ 1278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// Decimal integer: [1-9][0-9]* 1288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// TODO: FP literal. 1298d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexDigit() { 1308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Decimal integer: [1-9][0-9]* 1318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (CurPtr[-1] != '0') { 1328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt while (isdigit(*CurPtr)) 1338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ++CurPtr; 1348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt StringRef Result(TokStart, CurPtr - TokStart); 1368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt long long Value; 1388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (Result.getAsInteger(10, Value)) { 1398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // We have to handle minint_as_a_positive_value specially, because 1408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // - minint_as_a_positive_value = minint and it is valid. 1418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (Result == "9223372036854775808") 1428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt Value = -9223372036854775808ULL; 1438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt else 1448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ReturnError(TokStart, "Invalid decimal number"); 1458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 1468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Integer, Result, Value); 1478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 1488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (*CurPtr == 'b') { 1508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ++CurPtr; 1518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // See if we actually have "0b" as part of something like "jmp 0b\n" 1528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (!isdigit(CurPtr[0])) { 1538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt --CurPtr; 1548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt StringRef Result(TokStart, CurPtr - TokStart); 1558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Integer, Result, 0); 1568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 1578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt const char *NumStart = CurPtr; 1588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt while (CurPtr[0] == '0' || CurPtr[0] == '1') 1598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ++CurPtr; 1608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Requires at least one binary digit. 1628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (CurPtr == NumStart) 1638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ReturnError(TokStart, "Invalid binary number"); 1648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt StringRef Result(TokStart, CurPtr - TokStart); 1668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt long long Value; 1688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (Result.getAsInteger(2, Value)) 1698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ReturnError(TokStart, "Invalid binary number"); 1708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Integer, Result, Value); 1728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 1738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (*CurPtr == 'x') { 1758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ++CurPtr; 1768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt const char *NumStart = CurPtr; 1778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt while (isxdigit(CurPtr[0])) 1788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ++CurPtr; 1798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Requires at least one hex digit. 1818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (CurPtr == NumStart) 1828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ReturnError(CurPtr-2, "Invalid hexadecimal number"); 1838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt unsigned long long Result; 1858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) 1868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ReturnError(TokStart, "Invalid hexadecimal number"); 1878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), 1898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt (int64_t)Result); 1908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 1918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Must be an octal number, it starts with 0. 1938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt while (*CurPtr >= '0' && *CurPtr <= '7') 1948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ++CurPtr; 1958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt StringRef Result(TokStart, CurPtr - TokStart); 1978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt long long Value; 1988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (Result.getAsInteger(8, Value)) 1998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ReturnError(TokStart, "Invalid octal number"); 2008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Integer, Result, Value); 2028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/// LexQuote: String: "..." 2058d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexQuote() { 2068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt int CurChar = getNextChar(); 2078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // TODO: does gas allow multiline string constants? 2088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt while (CurChar != '"') { 2098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (CurChar == '\\') { 2108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Allow \", etc. 2118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt CurChar = getNextChar(); 2128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 2138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (CurChar == EOF) 2158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ReturnError(TokStart, "unterminated string constant"); 2168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt CurChar = getNextChar(); 2188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 2198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 2218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2238d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtStringRef AsmLexer::LexUntilEndOfStatement() { 2248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt TokStart = CurPtr; 2258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt while (!isAtStartOfComment(*CurPtr) && // Start of line comment. 2278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *CurPtr != ';' && // End of statement marker. 2288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *CurPtr != '\n' && 2298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *CurPtr != '\r' && 2308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { 2318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ++CurPtr; 2328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 2338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return StringRef(TokStart, CurPtr-TokStart); 2348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtbool AsmLexer::isAtStartOfComment(char Char) { 2378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // FIXME: This won't work for multi-character comment indicators like "//". 2388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return Char == *MAI.getCommentString(); 2398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2418d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtAsmToken AsmLexer::LexToken() { 2428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt TokStart = CurPtr; 2438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // This always consumes at least one character. 2448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt int CurChar = getNextChar(); 2458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (isAtStartOfComment(CurChar)) 2478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return LexLineComment(); 2488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt switch (CurChar) { 2508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt default: 2518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 2528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') 2538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return LexIdentifier(); 2548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Unknown character, emit an error. 2568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ReturnError(TokStart, "invalid character in input"); 2578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); 2588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case 0: 2598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case ' ': 2608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '\t': 2618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Ignore whitespace. 2628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return LexToken(); 2638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '\n': // FALL THROUGH. 2648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '\r': // FALL THROUGH. 2658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 2668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); 2678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); 2688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); 2698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); 2708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); 2718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); 2728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); 2738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); 2748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); 2758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); 2768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); 2778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); 2788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); 2798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); 2808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '=': 2818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (*CurPtr == '=') 2828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); 2838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); 2848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '|': 2858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (*CurPtr == '|') 2868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); 2878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); 2888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); 2898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '&': 2908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (*CurPtr == '&') 2918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); 2928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); 2938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '!': 2948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt if (*CurPtr == '=') 2958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); 2968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); 2978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); 2988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '/': return LexSlash(); 2998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 3008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '"': return LexQuote(); 3018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '0': case '1': case '2': case '3': case '4': 3028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '5': case '6': case '7': case '8': case '9': 3038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return LexDigit(); 3048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '<': 3058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt switch (*CurPtr) { 3068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, 3078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt StringRef(TokStart, 2)); 3088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, 3098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt StringRef(TokStart, 2)); 3108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, 3118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt StringRef(TokStart, 2)); 3128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); 3138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 3148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '>': 3158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt switch (*CurPtr) { 3168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, 3178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt StringRef(TokStart, 2)); 3188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, 3198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt StringRef(TokStart, 2)); 3208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); 3218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 3228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 3238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // TODO: Quoted identifiers (objc methods etc) 3248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // local labels: [0-9][:] 3258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Forward/backward labels: [0-9][fb] 3268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt // Integers, fp constants, character constants. 3278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } 3288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 3298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt