1a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// 2a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 3a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// The LLVM Compiler Infrastructure 4a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 5a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// This file is distributed under the University of Illinois Open Source 6a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// License. See LICENSE.TXT for details. 7a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 8a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===----------------------------------------------------------------------===// 9a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 10a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// This class implements the lexer for assembly files. 11a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 12a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===----------------------------------------------------------------------===// 13a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 14be343b3ca3f53d5d5e29f3591af8b9bb831daa98Chris Lattner#include "llvm/MC/MCParser/AsmLexer.h" 159823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby#include "llvm/MC/MCAsmInfo.h" 16d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/MemoryBuffer.h" 17d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/SMLoc.h" 18476b242fe7a61e5f9ac6214b0bc5c680d24f152eNick Lewycky#include <cctype> 194651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner#include <cerrno> 2066b9f29df22a84f25007c1a84b4d0bb8060552b9Duncan Sands#include <cstdio> 214506bd2cfd3e75535670890031eec26e216993b8Chris Lattner#include <cstdlib> 22a59e8779964992457ada1af6a5f48068523cfd42Chris Lattnerusing namespace llvm; 23a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 24fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean CallananAsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { 25dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CurPtr = nullptr; 266148225b9590f18fcb6a1d3151d3158b316965e0Jim Grosbach isAtStartOfLine = true; 2736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); 28faf32c102db556e367af1e5bca7359160840d2d0Chris Lattner} 29faf32c102db556e367af1e5bca7359160840d2d0Chris Lattner 30faf32c102db556e367af1e5bca7359160840d2d0Chris LattnerAsmLexer::~AsmLexer() { 31a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner} 32a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 33cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hinesvoid AsmLexer::setBuffer(StringRef Buf, const char *ptr) { 34cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines CurBuf = Buf; 35de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 36fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan if (ptr) 37fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurPtr = ptr; 38fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan else 39cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines CurPtr = CurBuf.begin(); 40de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 41dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines TokStart = nullptr; 42fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan} 43fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan 444651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// ReturnError - Set the error to the specified string at the specified 453f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar/// location. This is defined to always return AsmToken::Error. 46cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { 4779036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan SetError(SMLoc::getFromPointer(Loc), Msg); 48de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 493f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Error, StringRef(Loc, 0)); 504651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 514651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 52a59e8779964992457ada1af6a5f48068523cfd42Chris Lattnerint AsmLexer::getNextChar() { 53a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner char CurChar = *CurPtr++; 54a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner switch (CurChar) { 55a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner default: 56a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner return (unsigned char)CurChar; 57fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan case 0: 58a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // A nul character in the stream is either the end of the current buffer or 59a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // a random nul in the file. Disambiguate that here. 60cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (CurPtr - 1 != CurBuf.end()) 61a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner return 0; // Just whitespace. 62de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 63a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // Otherwise, return end of file. 64de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach --CurPtr; // Another call to lex will return EOF again. 65a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner return EOF; 66a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner } 67a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner} 68a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 694f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? 704f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// 714f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// The leading integral digit sequence and dot should have already been 724f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// consumed, some or all of the fractional digit sequence *can* have been 734f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// consumed. 744f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel DunbarAsmToken AsmLexer::LexFloatLiteral() { 754f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // Skip the fractional digit sequence. 764f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar while (isdigit(*CurPtr)) 774f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 784f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar 794f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // Check for exponent; we intentionally accept a slighlty wider set of 804f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // literals here and rely on the upstream client to reject invalid ones (e.g., 814f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // "1e+"). 824f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar if (*CurPtr == 'e' || *CurPtr == 'E') { 834f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 844f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar if (*CurPtr == '-' || *CurPtr == '+') 854f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 864f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar while (isdigit(*CurPtr)) 874f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 884f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar } 894f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar 904f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar return AsmToken(AsmToken::Real, 914f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar StringRef(TokStart, CurPtr - TokStart)); 924f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar} 934f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar 94337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ 95337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// while making sure there are enough actual digits around for the constant to 96337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// be valid. 97337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// 98337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed 99337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover/// before we get here. 100337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim NorthoverAsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { 101337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && 102337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover "unexpected parse state in floating hex"); 103337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover bool NoFracDigits = true; 104337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 105337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover // Skip the fractional part if there is one 106337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover if (*CurPtr == '.') { 107337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover ++CurPtr; 108337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 109337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover const char *FracStart = CurPtr; 110337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover while (isxdigit(*CurPtr)) 111337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover ++CurPtr; 112337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 113337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover NoFracDigits = CurPtr == FracStart; 114337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover } 115337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 116337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover if (NoIntDigits && NoFracDigits) 117337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 118337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover "expected at least one significand digit"); 119337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 120337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover // Make sure we do have some kind of proper exponent part 121337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover if (*CurPtr != 'p' && *CurPtr != 'P') 122337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 123337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover "expected exponent part 'p'"); 124337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover ++CurPtr; 125337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 126337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover if (*CurPtr == '+' || *CurPtr == '-') 127337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover ++CurPtr; 128337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 129337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover // N.b. exponent digits are *not* hex 130337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover const char *ExpStart = CurPtr; 131337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover while (isdigit(*CurPtr)) 132337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover ++CurPtr; 133337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 134337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover if (CurPtr == ExpStart) 135337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 136337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover "expected at least one exponent digit"); 137337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 138337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); 139337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover} 140337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 14122f9dd4591e8af6d6feed10a4b6e11a784582edcHans Wennborg/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* 14236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic bool IsIdentifierChar(char c, bool AllowAt) { 14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return isalnum(c) || c == '_' || c == '$' || c == '.' || 14436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines (c == '@' && AllowAt) || c == '?'; 14554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar} 146cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexIdentifier() { 14754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar // Check for floating point literals. 14854f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { 1494f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // Disambiguate a .1243foo identifier from a floating literal. 15054f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar while (isdigit(*CurPtr)) 15154f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar ++CurPtr; 15236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (*CurPtr == 'e' || *CurPtr == 'E' || 15336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines !IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) 1544f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar return LexFloatLiteral(); 15554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar } 15654f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar 15736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) 1584651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 159de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 160d305035155ef3d138e102434bf5a733ea2e32405Chris Lattner // Handle . as a special case. 1615fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar if (CurPtr == TokStart+1 && TokStart[0] == '.') 1625fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); 163de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 1643f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); 1654651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 1664651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 1674651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// LexSlash: Slash: / 1684651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// C-Style Comment: /* ... */ 169cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexSlash() { 170383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar switch (*CurPtr) { 171383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar case '*': break; // C style comment. 172383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar case '/': return ++CurPtr, LexLineComment(); 173bdf90d679befafe70b93082042266ba58a9ad0b2Daniel Dunbar default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1)); 174383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar } 1754651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 1764651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // C Style comment. 1774651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; // skip the star. 1784651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner while (1) { 1794651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner int CurChar = getNextChar(); 1804651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner switch (CurChar) { 1814651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case EOF: 18227aa7d259b416a9d1bf837ed2c3c11463367b11cChris Lattner return ReturnError(TokStart, "unterminated comment"); 1834651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '*': 1844651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // End of the comment? 1854651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurPtr[0] != '/') break; 186de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 1874651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; // End the */. 1884651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner return LexToken(); 1894651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 1904651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 1914651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 1924651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 193383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar/// LexLineComment: Comment: #[^\n]* 194383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar/// : //[^\n]* 195cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexLineComment() { 196cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar // FIXME: This is broken if we happen to a comment at the end of a file, which 197cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar // was .included, and which doesn't end with a newline. 1984651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner int CurChar = getNextChar(); 1990ecd825e54f2235c133b44c967a612551633106cChris Lattner while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) 2004651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner CurChar = getNextChar(); 201de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2024651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurChar == EOF) 203cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); 204cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); 2054651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 2064651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 207a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattnerstatic void SkipIgnoredIntegerSuffix(const char *&CurPtr) { 208ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach // Skip ULL, UL, U, L and LL suffices. 209ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach if (CurPtr[0] == 'U') 210ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach ++CurPtr; 211ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach if (CurPtr[0] == 'L') 212ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach ++CurPtr; 213ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach if (CurPtr[0] == 'L') 214ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach ++CurPtr; 215a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner} 216a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner 217d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier// Look ahead to search for first non-hex digit, if it's [hH], then we treat the 218d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier// integer as a hexadecimal, possibly with leading zeroes. 219d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosierstatic unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { 220dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const char *FirstHex = nullptr; 221d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier const char *LookAhead = CurPtr; 222d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier while (1) { 223d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (isdigit(*LookAhead)) { 224d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++LookAhead; 225d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } else if (isxdigit(*LookAhead)) { 226d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (!FirstHex) 227d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier FirstHex = LookAhead; 228d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++LookAhead; 229d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } else { 230d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier break; 231d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } 232d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } 233d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; 234e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; 235d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (isHex) 236d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier return 16; 237d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier return DefaultRadix; 238d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier} 239d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 24036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic AsmToken intToken(StringRef Ref, APInt &Value) 24136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines{ 24236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Value.isIntN(64)) 24336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return AsmToken(AsmToken::Integer, Ref, Value); 24436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return AsmToken(AsmToken::BigNum, Ref, Value); 24536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 24636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2474651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// LexDigit: First character is [0-9]. 2484651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// Local Label: [0-9][:] 249e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola/// Forward/Backward Label: [0-9][fb] 250e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola/// Binary integer: 0b[01]+ 2514651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// Octal integer: 0[0-7]+ 252d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] 2534651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// Decimal integer: [1-9][0-9]* 254cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexDigit() { 2554651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Decimal integer: [1-9][0-9]* 256facb34b41cea284b5a0b4992ff619e5cfd5e6a22Daniel Dunbar if (CurPtr[-1] != '0' || CurPtr[0] == '.') { 257d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier unsigned Radix = doLookAhead(CurPtr, 10); 258e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola bool isHex = Radix == 16; 25954f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar // Check for floating point literals. 260e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { 26154f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar ++CurPtr; 2624f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar return LexFloatLiteral(); 26354f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar } 26454f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar 265d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner StringRef Result(TokStart, CurPtr - TokStart); 266a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner 26736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines APInt Value(128, 0, true); 26836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Result.getAsInteger(Radix, Value)) 26936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return ReturnError(TokStart, !isHex ? "invalid decimal number" : 270d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier "invalid hexdecimal number"); 271de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 272d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier // Consume the [bB][hH]. 273d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (Radix == 2 || Radix == 16) 274d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++CurPtr; 275d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 276ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach // The darwin/x86 (and x86-64) assembler accepts and ignores type 277ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach // suffices on integer literals. 278a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 279de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 28036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return intToken(Result, Value); 2814651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 282de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2834651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (*CurPtr == 'b') { 284e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola ++CurPtr; 285e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola // See if we actually have "0b" as part of something like "jmp 0b\n" 286e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola if (!isdigit(CurPtr[0])) { 287e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola --CurPtr; 288e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola StringRef Result(TokStart, CurPtr - TokStart); 289e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola return AsmToken(AsmToken::Integer, Result, 0); 290e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola } 291e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola const char *NumStart = CurPtr; 2924651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner while (CurPtr[0] == '0' || CurPtr[0] == '1') 2934651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 294de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2954651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Requires at least one binary digit. 2964651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurPtr == NumStart) 29705f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(TokStart, "invalid binary number"); 298de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 299d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner StringRef Result(TokStart, CurPtr - TokStart); 300de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 30136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines APInt Value(128, 0, true); 302a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner if (Result.substr(2).getAsInteger(2, Value)) 30305f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(TokStart, "invalid binary number"); 304de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 305a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 306a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // suffixes on integer literals. 307a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 308de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 30936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return intToken(Result, Value); 3104651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 311de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 3124651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (*CurPtr == 'x') { 3134651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 3144651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner const char *NumStart = CurPtr; 3154651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner while (isxdigit(CurPtr[0])) 3164651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 317de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 318337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be 319337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover // diagnosed by LexHexFloatLiteral). 320337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') 321337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover return LexHexFloatLiteral(NumStart == CurPtr); 322337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover 323337439d12d2e2a9e820e0aeee261bbdb935fc0a5Tim Northover // Otherwise requires at least one hex digit. 3244651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurPtr == NumStart) 32505f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(CurPtr-2, "invalid hexadecimal number"); 32603949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner 32736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines APInt Result(128, 0); 32803949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) 32905f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(TokStart, "invalid hexadecimal number"); 330de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 331d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier // Consume the optional [hH]. 332d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (*CurPtr == 'h' || *CurPtr == 'H') 333d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++CurPtr; 334d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 335a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 336a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // suffixes on integer literals. 337a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 338de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 33936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return intToken(StringRef(TokStart, CurPtr - TokStart), Result); 3404651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 341de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 34250e75bfc29269def44981ab5f109334d95f55007Matt Beaumont-Gay // Either octal or hexadecimal. 34336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines APInt Value(128, 0, true); 344d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier unsigned Radix = doLookAhead(CurPtr, 8); 345e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola bool isHex = Radix == 16; 346d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier StringRef Result(TokStart, CurPtr - TokStart); 347d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (Result.getAsInteger(Radix, Value)) 348e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola return ReturnError(TokStart, !isHex ? "invalid octal number" : 34953e5bb70db34b736eed01c1580af1afd7314a2d8Chad Rosier "invalid hexdecimal number"); 350de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 351e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola // Consume the [hH]. 352e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola if (Radix == 16) 353d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++CurPtr; 354d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 355a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 356a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // suffixes on integer literals. 357a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 358de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 35936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return intToken(Result, Value); 3604651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 3614651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 3627529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky/// LexSingleQuote: Integer: 'b' 3637529b164104f802b59fbf96f5a88d9709ac3ff1aRoman DivackyAsmToken AsmLexer::LexSingleQuote() { 3647529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky int CurChar = getNextChar(); 3657529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3667529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (CurChar == '\\') 3677529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky CurChar = getNextChar(); 3687529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3697529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (CurChar == EOF) 3707529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky return ReturnError(TokStart, "unterminated single quote"); 3717529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3727529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky CurChar = getNextChar(); 3737529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3747529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (CurChar != '\'') 3757529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky return ReturnError(TokStart, "single quote way too long"); 3767529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3777529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky // The idea here being that 'c' is basically just an integral 3787529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky // constant. 3797529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky StringRef Res = StringRef(TokStart,CurPtr - TokStart); 3807529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky long long Value; 3817529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3827529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (Res.startswith("\'\\")) { 3837529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky char theChar = Res[2]; 3847529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky switch (theChar) { 3857529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky default: Value = theChar; break; 3867529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case '\'': Value = '\''; break; 3877529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case 't': Value = '\t'; break; 3887529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case 'n': Value = '\n'; break; 3897529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case 'b': Value = '\b'; break; 3907529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky } 3917529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky } else 3927529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky Value = TokStart[1]; 3937529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 394de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach return AsmToken(AsmToken::Integer, Res, Value); 3957529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky} 3967529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3977529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 39810a907d70fb54c40eecabb889e81c79b44092221Chris Lattner/// LexQuote: String: "..." 399cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexQuote() { 40010a907d70fb54c40eecabb889e81c79b44092221Chris Lattner int CurChar = getNextChar(); 40110a907d70fb54c40eecabb889e81c79b44092221Chris Lattner // TODO: does gas allow multiline string constants? 40210a907d70fb54c40eecabb889e81c79b44092221Chris Lattner while (CurChar != '"') { 40310a907d70fb54c40eecabb889e81c79b44092221Chris Lattner if (CurChar == '\\') { 40410a907d70fb54c40eecabb889e81c79b44092221Chris Lattner // Allow \", etc. 40510a907d70fb54c40eecabb889e81c79b44092221Chris Lattner CurChar = getNextChar(); 40610a907d70fb54c40eecabb889e81c79b44092221Chris Lattner } 407de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 40814ee48a5bae352780b767a14bd97e8e91800a95bChris Lattner if (CurChar == EOF) 40914ee48a5bae352780b767a14bd97e8e91800a95bChris Lattner return ReturnError(TokStart, "unterminated string constant"); 41010a907d70fb54c40eecabb889e81c79b44092221Chris Lattner 41110a907d70fb54c40eecabb889e81c79b44092221Chris Lattner CurChar = getNextChar(); 41210a907d70fb54c40eecabb889e81c79b44092221Chris Lattner } 413de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 4143f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 41510a907d70fb54c40eecabb889e81c79b44092221Chris Lattner} 41610a907d70fb54c40eecabb889e81c79b44092221Chris Lattner 417ff4bc460c52c1f285d8a56da173641bf92d49e3fChris LattnerStringRef AsmLexer::LexUntilEndOfStatement() { 418ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner TokStart = CurPtr; 419ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner 420d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach while (!isAtStartOfComment(*CurPtr) && // Start of line comment. 421d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach !isAtStatementSeparator(CurPtr) && // End of statement marker. 422cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines *CurPtr != '\n' && *CurPtr != '\r' && 423cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines (*CurPtr != 0 || CurPtr != CurBuf.end())) { 424ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner ++CurPtr; 4259823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby } 426ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner return StringRef(TokStart, CurPtr-TokStart); 427ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner} 4284651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 429f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin EnderbyStringRef AsmLexer::LexUntilEndOfLine() { 430f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby TokStart = CurPtr; 431f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby 432cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines while (*CurPtr != '\n' && *CurPtr != '\r' && 433cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines (*CurPtr != 0 || CurPtr != CurBuf.end())) { 434f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby ++CurPtr; 435f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby } 436f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby return StringRef(TokStart, CurPtr-TokStart); 437f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby} 438f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby 43936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesconst AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) { 44036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const char *SavedTokStart = TokStart; 44136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const char *SavedCurPtr = CurPtr; 44236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool SavedAtStartOfLine = isAtStartOfLine; 44336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool SavedSkipSpace = SkipSpace; 44436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 44536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::string SavedErr = getErr(); 44636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SMLoc SavedErrLoc = getErrLoc(); 44736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 44836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SkipSpace = ShouldSkipSpace; 44936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AsmToken Token = LexToken(); 45036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 45136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SetError(SavedErrLoc, SavedErr); 45236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 45336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SkipSpace = SavedSkipSpace; 45436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines isAtStartOfLine = SavedAtStartOfLine; 45536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurPtr = SavedCurPtr; 45636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TokStart = SavedTokStart; 45736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 45836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return Token; 45936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 46036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 461b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderbybool AsmLexer::isAtStartOfComment(char Char) { 462cec545097cf1a81c0936cf62e39a81940d9f720fChris Lattner // FIXME: This won't work for multi-character comment indicators like "//". 463cec545097cf1a81c0936cf62e39a81940d9f720fChris Lattner return Char == *MAI.getCommentString(); 464b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby} 465b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby 466d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbachbool AsmLexer::isAtStatementSeparator(const char *Ptr) { 467d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach return strncmp(Ptr, MAI.getSeparatorString(), 468d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach strlen(MAI.getSeparatorString())) == 0; 469d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach} 470d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach 471cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexToken() { 472a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner TokStart = CurPtr; 473a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // This always consumes at least one character. 474a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner int CurChar = getNextChar(); 475de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 476f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby if (isAtStartOfComment(CurChar)) { 477f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby // If this comment starts with a '#', then return the Hash token and let 478f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby // the assembler parser see if it can be parsed as a cpp line filename 479f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby // comment. We do this only if we are at the start of a line. 480f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby if (CurChar == '#' && isAtStartOfLine) 481f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 482f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby isAtStartOfLine = true; 483b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby return LexLineComment(); 484f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby } 485d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach if (isAtStatementSeparator(TokStart)) { 486d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach CurPtr += strlen(MAI.getSeparatorString()) - 1; 487d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach return AsmToken(AsmToken::EndOfStatement, 488d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach StringRef(TokStart, strlen(MAI.getSeparatorString()))); 489d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach } 4909823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby 49170796ca867132fd8c767301061afb9760cd69167Jim Grosbach // If we're missing a newline at EOF, make sure we still get an 49270796ca867132fd8c767301061afb9760cd69167Jim Grosbach // EndOfStatement token before the Eof token. 49370796ca867132fd8c767301061afb9760cd69167Jim Grosbach if (CurChar == EOF && !isAtStartOfLine) { 49470796ca867132fd8c767301061afb9760cd69167Jim Grosbach isAtStartOfLine = true; 49570796ca867132fd8c767301061afb9760cd69167Jim Grosbach return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 49670796ca867132fd8c767301061afb9760cd69167Jim Grosbach } 49770796ca867132fd8c767301061afb9760cd69167Jim Grosbach 49870796ca867132fd8c767301061afb9760cd69167Jim Grosbach isAtStartOfLine = false; 499a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner switch (CurChar) { 500a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner default: 5015fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 5025fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') 5034651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner return LexIdentifier(); 504de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 505a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // Unknown character, emit an error. 50627aa7d259b416a9d1bf837ed2c3c11463367b11cChris Lattner return ReturnError(TokStart, "invalid character in input"); 5073f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); 508a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner case 0: 509a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner case ' ': 510a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner case '\t': 5117b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd if (SkipSpace) { 5127b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd // Ignore whitespace. 5137b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd return LexToken(); 5147b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd } else { 5157b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd int len = 1; 5167b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd while (*CurPtr==' ' || *CurPtr=='\t') { 5177b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd CurPtr++; 5187b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd len++; 5197b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd } 5207b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd return AsmToken(AsmToken::Space, StringRef(TokStart, len)); 5217b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd } 5224651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '\n': // FALL THROUGH. 523d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach case '\r': 524f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby isAtStartOfLine = true; 525d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 5263f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); 5273f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); 5283f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); 5293f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); 5303f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); 5313f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); 532fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); 533fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); 534fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); 535fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); 5363f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); 5373f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); 5385fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); 539924c5e58f2a7c89019000e7dee3391dcebdf8a21Matt Fleming case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); 540653664471333f316020e96dd3d664f4984f66a65Rafael Espindola case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); 541de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '=': 542475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '=') 5433f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); 5443f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); 545de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '|': 546475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '|') 5473f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); 5483f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); 5493f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); 550de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '&': 551475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '&') 5523f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); 5533f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); 554de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '!': 555475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '=') 5563f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); 5573f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); 5587b4608dfa018455021050ccd31d3c49aaecf7ff6Kevin Enderby case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); 5594651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '/': return LexSlash(); 5609823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 5617529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case '\'': return LexSingleQuote(); 56210a907d70fb54c40eecabb889e81c79b44092221Chris Lattner case '"': return LexQuote(); 5634651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '0': case '1': case '2': case '3': case '4': 5644651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '5': case '6': case '7': case '8': case '9': 5654651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner return LexDigit(); 5668dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner case '<': 567475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar switch (*CurPtr) { 568de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, 569cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 570de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, 571cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 572de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, 573cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 5743f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); 5758dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner } 5768dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner case '>': 577475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar switch (*CurPtr) { 578de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, 579cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 580de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, 581cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 5823f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); 5838dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner } 584de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 5854651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // TODO: Quoted identifiers (objc methods etc) 5864651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // local labels: [0-9][:] 5874651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Forward/backward labels: [0-9][fb] 5884651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Integers, fp constants, character constants. 589a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner } 59066b9f29df22a84f25007c1a84b4d0bb8060552b9Duncan Sands} 591