1a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// 2a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 3a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// The LLVM Compiler Infrastructure 4a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 5a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// This file is distributed under the University of Illinois Open Source 6a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// License. See LICENSE.TXT for details. 7a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 8a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===----------------------------------------------------------------------===// 9a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 10a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// This class implements the lexer for assembly files. 11a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 12a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===----------------------------------------------------------------------===// 13a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 14be343b3ca3f53d5d5e29f3591af8b9bb831daa98Chris Lattner#include "llvm/MC/MCParser/AsmLexer.h" 159823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby#include "llvm/MC/MCAsmInfo.h" 16d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/MemoryBuffer.h" 17d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/SMLoc.h" 18476b242fe7a61e5f9ac6214b0bc5c680d24f152eNick Lewycky#include <cctype> 194651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner#include <cerrno> 2066b9f29df22a84f25007c1a84b4d0bb8060552b9Duncan Sands#include <cstdio> 214506bd2cfd3e75535670890031eec26e216993b8Chris Lattner#include <cstdlib> 22a59e8779964992457ada1af6a5f48068523cfd42Chris Lattnerusing namespace llvm; 23a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 24fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean CallananAsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { 25fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurBuf = NULL; 26fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurPtr = NULL; 276148225b9590f18fcb6a1d3151d3158b316965e0Jim Grosbach isAtStartOfLine = true; 28faf32c102db556e367af1e5bca7359160840d2d0Chris Lattner} 29faf32c102db556e367af1e5bca7359160840d2d0Chris Lattner 30faf32c102db556e367af1e5bca7359160840d2d0Chris LattnerAsmLexer::~AsmLexer() { 31a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner} 32a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 33fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callananvoid AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { 34fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurBuf = buf; 35de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 36fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan if (ptr) 37fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurPtr = ptr; 38fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan else 39fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurPtr = CurBuf->getBufferStart(); 40de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 41fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan TokStart = 0; 42fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan} 43fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan 444651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// ReturnError - Set the error to the specified string at the specified 453f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar/// location. This is defined to always return AsmToken::Error. 46cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { 4779036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan SetError(SMLoc::getFromPointer(Loc), Msg); 48de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 493f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Error, StringRef(Loc, 0)); 504651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 514651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 52a59e8779964992457ada1af6a5f48068523cfd42Chris Lattnerint AsmLexer::getNextChar() { 53a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner char CurChar = *CurPtr++; 54a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner switch (CurChar) { 55a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner default: 56a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner return (unsigned char)CurChar; 57fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan case 0: 58a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // A nul character in the stream is either the end of the current buffer or 59a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // a random nul in the file. Disambiguate that here. 60a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner if (CurPtr-1 != CurBuf->getBufferEnd()) 61a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner return 0; // Just whitespace. 62de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 63a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // Otherwise, return end of file. 64de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach --CurPtr; // Another call to lex will return EOF again. 65a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner return EOF; 66a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner } 67a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner} 68a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 694f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? 704f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// 714f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// The leading integral digit sequence and dot should have already been 724f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// consumed, some or all of the fractional digit sequence *can* have been 734f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// consumed. 744f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel DunbarAsmToken AsmLexer::LexFloatLiteral() { 754f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // Skip the fractional digit sequence. 764f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar while (isdigit(*CurPtr)) 774f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 784f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar 794f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // Check for exponent; we intentionally accept a slighlty wider set of 804f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // literals here and rely on the upstream client to reject invalid ones (e.g., 814f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // "1e+"). 824f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar if (*CurPtr == 'e' || *CurPtr == 'E') { 834f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 844f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar if (*CurPtr == '-' || *CurPtr == '+') 854f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 864f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar while (isdigit(*CurPtr)) 874f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 884f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar } 894f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar 904f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar return AsmToken(AsmToken::Real, 914f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar StringRef(TokStart, CurPtr - TokStart)); 924f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar} 934f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar 945fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 9554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbarstatic bool IsIdentifierChar(char c) { 9654f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@'; 9754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar} 98cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexIdentifier() { 9954f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar // Check for floating point literals. 10054f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { 1014f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // Disambiguate a .1243foo identifier from a floating literal. 10254f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar while (isdigit(*CurPtr)) 10354f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar ++CurPtr; 1044f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr)) 1054f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar return LexFloatLiteral(); 10654f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar } 10754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar 10854f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar while (IsIdentifierChar(*CurPtr)) 1094651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 110de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 111d305035155ef3d138e102434bf5a733ea2e32405Chris Lattner // Handle . as a special case. 1125fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar if (CurPtr == TokStart+1 && TokStart[0] == '.') 1135fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); 114de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 1153f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); 1164651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 1174651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 1184651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// LexSlash: Slash: / 1194651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// C-Style Comment: /* ... */ 120cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexSlash() { 121383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar switch (*CurPtr) { 122383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar case '*': break; // C style comment. 123383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar case '/': return ++CurPtr, LexLineComment(); 124bdf90d679befafe70b93082042266ba58a9ad0b2Daniel Dunbar default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1)); 125383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar } 1264651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 1274651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // C Style comment. 1284651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; // skip the star. 1294651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner while (1) { 1304651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner int CurChar = getNextChar(); 1314651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner switch (CurChar) { 1324651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case EOF: 13327aa7d259b416a9d1bf837ed2c3c11463367b11cChris Lattner return ReturnError(TokStart, "unterminated comment"); 1344651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '*': 1354651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // End of the comment? 1364651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurPtr[0] != '/') break; 137de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 1384651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; // End the */. 1394651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner return LexToken(); 1404651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 1414651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 1424651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 1434651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 144383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar/// LexLineComment: Comment: #[^\n]* 145383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar/// : //[^\n]* 146cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexLineComment() { 147cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar // FIXME: This is broken if we happen to a comment at the end of a file, which 148cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar // was .included, and which doesn't end with a newline. 1494651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner int CurChar = getNextChar(); 1500ecd825e54f2235c133b44c967a612551633106cChris Lattner while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) 1514651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner CurChar = getNextChar(); 152de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 1534651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurChar == EOF) 1543f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); 1553f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); 1564651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 1574651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 158a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattnerstatic void SkipIgnoredIntegerSuffix(const char *&CurPtr) { 159ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach // Skip ULL, UL, U, L and LL suffices. 160ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach if (CurPtr[0] == 'U') 161ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach ++CurPtr; 162ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach if (CurPtr[0] == 'L') 163ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach ++CurPtr; 164ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach if (CurPtr[0] == 'L') 165ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach ++CurPtr; 166a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner} 167a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner 168d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier// Look ahead to search for first non-hex digit, if it's [hH], then we treat the 169d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier// integer as a hexadecimal, possibly with leading zeroes. 170d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosierstatic unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { 171d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier const char *FirstHex = 0; 172d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier const char *LookAhead = CurPtr; 173d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier while (1) { 174d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (isdigit(*LookAhead)) { 175d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++LookAhead; 176d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } else if (isxdigit(*LookAhead)) { 177d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (!FirstHex) 178d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier FirstHex = LookAhead; 179d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++LookAhead; 180d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } else { 181d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier break; 182d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } 183d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } 184d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; 185e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; 186d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (isHex) 187d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier return 16; 188d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier return DefaultRadix; 189d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier} 190d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 1914651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// LexDigit: First character is [0-9]. 1924651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// Local Label: [0-9][:] 193e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola/// Forward/Backward Label: [0-9][fb] 194e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola/// Binary integer: 0b[01]+ 1954651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// Octal integer: 0[0-7]+ 196d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] 1974651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// Decimal integer: [1-9][0-9]* 198cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexDigit() { 1994651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Decimal integer: [1-9][0-9]* 200facb34b41cea284b5a0b4992ff619e5cfd5e6a22Daniel Dunbar if (CurPtr[-1] != '0' || CurPtr[0] == '.') { 201d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier unsigned Radix = doLookAhead(CurPtr, 10); 202e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola bool isHex = Radix == 16; 20354f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar // Check for floating point literals. 204e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { 20554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar ++CurPtr; 2064f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar return LexFloatLiteral(); 20754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar } 20854f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar 209d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner StringRef Result(TokStart, CurPtr - TokStart); 210a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner 211d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner long long Value; 212d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (Result.getAsInteger(Radix, Value)) { 2137ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner // Allow positive values that are too large to fit into a signed 64-bit 2147ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner // integer, but that do fit in an unsigned one, we just convert them over. 2157ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner unsigned long long UValue; 216d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (Result.getAsInteger(Radix, UValue)) 217e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola return ReturnError(TokStart, !isHex ? "invalid decimal number" : 218d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier "invalid hexdecimal number"); 2197ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner Value = (long long)UValue; 2203a151be8d5f85ca12737f5664ef91192014a4f77Chris Lattner } 221de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 222d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier // Consume the [bB][hH]. 223d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (Radix == 2 || Radix == 16) 224d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++CurPtr; 225d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 226ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach // The darwin/x86 (and x86-64) assembler accepts and ignores type 227ac67b50fcfaab20829b4bce32cfdce77507f6c72Jim Grosbach // suffices on integer literals. 228a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 229de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 230d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner return AsmToken(AsmToken::Integer, Result, Value); 2314651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 232de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2334651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (*CurPtr == 'b') { 234e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola ++CurPtr; 235e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola // See if we actually have "0b" as part of something like "jmp 0b\n" 236e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola if (!isdigit(CurPtr[0])) { 237e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola --CurPtr; 238e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola StringRef Result(TokStart, CurPtr - TokStart); 239e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola return AsmToken(AsmToken::Integer, Result, 0); 240e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola } 241e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola const char *NumStart = CurPtr; 2424651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner while (CurPtr[0] == '0' || CurPtr[0] == '1') 2434651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 244de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2454651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Requires at least one binary digit. 2464651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurPtr == NumStart) 24705f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(TokStart, "invalid binary number"); 248de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 249d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner StringRef Result(TokStart, CurPtr - TokStart); 250de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 251d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner long long Value; 252a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner if (Result.substr(2).getAsInteger(2, Value)) 25305f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(TokStart, "invalid binary number"); 254de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 255a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 256a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // suffixes on integer literals. 257a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 258de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 259d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner return AsmToken(AsmToken::Integer, Result, Value); 2604651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 261de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2624651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (*CurPtr == 'x') { 2634651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 2644651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner const char *NumStart = CurPtr; 2654651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner while (isxdigit(CurPtr[0])) 2664651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 267de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2684651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Requires at least one hex digit. 2694651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurPtr == NumStart) 27005f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(CurPtr-2, "invalid hexadecimal number"); 27103949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner 27203949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner unsigned long long Result; 27303949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) 27405f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(TokStart, "invalid hexadecimal number"); 275de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 276d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier // Consume the optional [hH]. 277d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (*CurPtr == 'h' || *CurPtr == 'H') 278d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++CurPtr; 279d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 280a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 281a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // suffixes on integer literals. 282a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 283de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2843f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), 28503949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner (int64_t)Result); 2864651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 287de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 28850e75bfc29269def44981ab5f109334d95f55007Matt Beaumont-Gay // Either octal or hexadecimal. 289d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner long long Value; 290d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier unsigned Radix = doLookAhead(CurPtr, 8); 291e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola bool isHex = Radix == 16; 292d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier StringRef Result(TokStart, CurPtr - TokStart); 293d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (Result.getAsInteger(Radix, Value)) 294e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola return ReturnError(TokStart, !isHex ? "invalid octal number" : 29553e5bb70db34b736eed01c1580af1afd7314a2d8Chad Rosier "invalid hexdecimal number"); 296de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 297e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola // Consume the [hH]. 298e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola if (Radix == 16) 299d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++CurPtr; 300d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 301a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 302a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // suffixes on integer literals. 303a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 304de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 305d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner return AsmToken(AsmToken::Integer, Result, Value); 3064651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 3074651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 3087529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky/// LexSingleQuote: Integer: 'b' 3097529b164104f802b59fbf96f5a88d9709ac3ff1aRoman DivackyAsmToken AsmLexer::LexSingleQuote() { 3107529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky int CurChar = getNextChar(); 3117529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3127529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (CurChar == '\\') 3137529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky CurChar = getNextChar(); 3147529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3157529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (CurChar == EOF) 3167529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky return ReturnError(TokStart, "unterminated single quote"); 3177529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3187529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky CurChar = getNextChar(); 3197529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3207529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (CurChar != '\'') 3217529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky return ReturnError(TokStart, "single quote way too long"); 3227529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3237529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky // The idea here being that 'c' is basically just an integral 3247529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky // constant. 3257529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky StringRef Res = StringRef(TokStart,CurPtr - TokStart); 3267529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky long long Value; 3277529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3287529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (Res.startswith("\'\\")) { 3297529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky char theChar = Res[2]; 3307529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky switch (theChar) { 3317529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky default: Value = theChar; break; 3327529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case '\'': Value = '\''; break; 3337529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case 't': Value = '\t'; break; 3347529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case 'n': Value = '\n'; break; 3357529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case 'b': Value = '\b'; break; 3367529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky } 3377529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky } else 3387529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky Value = TokStart[1]; 3397529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 340de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach return AsmToken(AsmToken::Integer, Res, Value); 3417529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky} 3427529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3437529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 34410a907d70fb54c40eecabb889e81c79b44092221Chris Lattner/// LexQuote: String: "..." 345cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexQuote() { 34610a907d70fb54c40eecabb889e81c79b44092221Chris Lattner int CurChar = getNextChar(); 34710a907d70fb54c40eecabb889e81c79b44092221Chris Lattner // TODO: does gas allow multiline string constants? 34810a907d70fb54c40eecabb889e81c79b44092221Chris Lattner while (CurChar != '"') { 34910a907d70fb54c40eecabb889e81c79b44092221Chris Lattner if (CurChar == '\\') { 35010a907d70fb54c40eecabb889e81c79b44092221Chris Lattner // Allow \", etc. 35110a907d70fb54c40eecabb889e81c79b44092221Chris Lattner CurChar = getNextChar(); 35210a907d70fb54c40eecabb889e81c79b44092221Chris Lattner } 353de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 35414ee48a5bae352780b767a14bd97e8e91800a95bChris Lattner if (CurChar == EOF) 35514ee48a5bae352780b767a14bd97e8e91800a95bChris Lattner return ReturnError(TokStart, "unterminated string constant"); 35610a907d70fb54c40eecabb889e81c79b44092221Chris Lattner 35710a907d70fb54c40eecabb889e81c79b44092221Chris Lattner CurChar = getNextChar(); 35810a907d70fb54c40eecabb889e81c79b44092221Chris Lattner } 359de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 3603f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 36110a907d70fb54c40eecabb889e81c79b44092221Chris Lattner} 36210a907d70fb54c40eecabb889e81c79b44092221Chris Lattner 363ff4bc460c52c1f285d8a56da173641bf92d49e3fChris LattnerStringRef AsmLexer::LexUntilEndOfStatement() { 364ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner TokStart = CurPtr; 365ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner 366d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach while (!isAtStartOfComment(*CurPtr) && // Start of line comment. 367d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach !isAtStatementSeparator(CurPtr) && // End of statement marker. 368ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner *CurPtr != '\n' && 369ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner *CurPtr != '\r' && 3709823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { 371ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner ++CurPtr; 3729823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby } 373ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner return StringRef(TokStart, CurPtr-TokStart); 374ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner} 3754651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 376f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin EnderbyStringRef AsmLexer::LexUntilEndOfLine() { 377f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby TokStart = CurPtr; 378f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby 379f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby while (*CurPtr != '\n' && 380f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby *CurPtr != '\r' && 381f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { 382f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby ++CurPtr; 383f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby } 384f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby return StringRef(TokStart, CurPtr-TokStart); 385f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby} 386f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby 387b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderbybool AsmLexer::isAtStartOfComment(char Char) { 388cec545097cf1a81c0936cf62e39a81940d9f720fChris Lattner // FIXME: This won't work for multi-character comment indicators like "//". 389cec545097cf1a81c0936cf62e39a81940d9f720fChris Lattner return Char == *MAI.getCommentString(); 390b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby} 391b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby 392d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbachbool AsmLexer::isAtStatementSeparator(const char *Ptr) { 393d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach return strncmp(Ptr, MAI.getSeparatorString(), 394d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach strlen(MAI.getSeparatorString())) == 0; 395d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach} 396d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach 397cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexToken() { 398a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner TokStart = CurPtr; 399a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // This always consumes at least one character. 400a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner int CurChar = getNextChar(); 401de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 402f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby if (isAtStartOfComment(CurChar)) { 403f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby // If this comment starts with a '#', then return the Hash token and let 404f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby // the assembler parser see if it can be parsed as a cpp line filename 405f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby // comment. We do this only if we are at the start of a line. 406f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby if (CurChar == '#' && isAtStartOfLine) 407f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 408f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby isAtStartOfLine = true; 409b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby return LexLineComment(); 410f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby } 411d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach if (isAtStatementSeparator(TokStart)) { 412d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach CurPtr += strlen(MAI.getSeparatorString()) - 1; 413d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach return AsmToken(AsmToken::EndOfStatement, 414d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach StringRef(TokStart, strlen(MAI.getSeparatorString()))); 415d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach } 4169823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby 41770796ca867132fd8c767301061afb9760cd69167Jim Grosbach // If we're missing a newline at EOF, make sure we still get an 41870796ca867132fd8c767301061afb9760cd69167Jim Grosbach // EndOfStatement token before the Eof token. 41970796ca867132fd8c767301061afb9760cd69167Jim Grosbach if (CurChar == EOF && !isAtStartOfLine) { 42070796ca867132fd8c767301061afb9760cd69167Jim Grosbach isAtStartOfLine = true; 42170796ca867132fd8c767301061afb9760cd69167Jim Grosbach return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 42270796ca867132fd8c767301061afb9760cd69167Jim Grosbach } 42370796ca867132fd8c767301061afb9760cd69167Jim Grosbach 42470796ca867132fd8c767301061afb9760cd69167Jim Grosbach isAtStartOfLine = false; 425a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner switch (CurChar) { 426a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner default: 4275fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 4285fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') 4294651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner return LexIdentifier(); 430de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 431a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // Unknown character, emit an error. 43227aa7d259b416a9d1bf837ed2c3c11463367b11cChris Lattner return ReturnError(TokStart, "invalid character in input"); 4333f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); 434a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner case 0: 435a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner case ' ': 436a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner case '\t': 4377b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd if (SkipSpace) { 4387b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd // Ignore whitespace. 4397b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd return LexToken(); 4407b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd } else { 4417b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd int len = 1; 4427b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd while (*CurPtr==' ' || *CurPtr=='\t') { 4437b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd CurPtr++; 4447b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd len++; 4457b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd } 4467b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd return AsmToken(AsmToken::Space, StringRef(TokStart, len)); 4477b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd } 4484651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '\n': // FALL THROUGH. 449d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach case '\r': 450f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby isAtStartOfLine = true; 451d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 4523f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); 4533f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); 4543f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); 4553f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); 4563f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); 4573f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); 458fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); 459fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); 460fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); 461fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); 4623f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); 4633f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); 4645fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); 465924c5e58f2a7c89019000e7dee3391dcebdf8a21Matt Fleming case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); 466653664471333f316020e96dd3d664f4984f66a65Rafael Espindola case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); 467de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '=': 468475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '=') 4693f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); 4703f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); 471de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '|': 472475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '|') 4733f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); 4743f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); 4753f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); 476de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '&': 477475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '&') 4783f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); 4793f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); 480de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '!': 481475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '=') 4823f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); 4833f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); 4847b4608dfa018455021050ccd31d3c49aaecf7ff6Kevin Enderby case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); 4854651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '/': return LexSlash(); 4869823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 4877529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case '\'': return LexSingleQuote(); 48810a907d70fb54c40eecabb889e81c79b44092221Chris Lattner case '"': return LexQuote(); 4894651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '0': case '1': case '2': case '3': case '4': 4904651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '5': case '6': case '7': case '8': case '9': 4914651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner return LexDigit(); 4928dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner case '<': 493475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar switch (*CurPtr) { 494de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, 495cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 496de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, 497cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 498de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, 499cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 5003f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); 5018dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner } 5028dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner case '>': 503475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar switch (*CurPtr) { 504de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, 505cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 506de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, 507cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 5083f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); 5098dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner } 510de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 5114651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // TODO: Quoted identifiers (objc methods etc) 5124651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // local labels: [0-9][:] 5134651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Forward/backward labels: [0-9][fb] 5144651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Integers, fp constants, character constants. 515a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner } 51666b9f29df22a84f25007c1a84b4d0bb8060552b9Duncan Sands} 517