AsmLexer.cpp revision 50e75bfc29269def44981ab5f109334d95f55007
1a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// 2a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 3a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// The LLVM Compiler Infrastructure 4a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 5a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// This file is distributed under the University of Illinois Open Source 6a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// License. See LICENSE.TXT for details. 7a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 8a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===----------------------------------------------------------------------===// 9a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 10a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// This class implements the lexer for assembly files. 11a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner// 12a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner//===----------------------------------------------------------------------===// 13a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 14be343b3ca3f53d5d5e29f3591af8b9bb831daa98Chris Lattner#include "llvm/MC/MCParser/AsmLexer.h" 159823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby#include "llvm/MC/MCAsmInfo.h" 16d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/MemoryBuffer.h" 17d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/SMLoc.h" 18476b242fe7a61e5f9ac6214b0bc5c680d24f152eNick Lewycky#include <cctype> 194651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner#include <cerrno> 2066b9f29df22a84f25007c1a84b4d0bb8060552b9Duncan Sands#include <cstdio> 214506bd2cfd3e75535670890031eec26e216993b8Chris Lattner#include <cstdlib> 22a59e8779964992457ada1af6a5f48068523cfd42Chris Lattnerusing namespace llvm; 23a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 24fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean CallananAsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { 25fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurBuf = NULL; 26fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurPtr = NULL; 276148225b9590f18fcb6a1d3151d3158b316965e0Jim Grosbach isAtStartOfLine = true; 28faf32c102db556e367af1e5bca7359160840d2d0Chris Lattner} 29faf32c102db556e367af1e5bca7359160840d2d0Chris Lattner 30faf32c102db556e367af1e5bca7359160840d2d0Chris LattnerAsmLexer::~AsmLexer() { 31a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner} 32a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 33fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callananvoid AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { 34fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurBuf = buf; 35de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 36fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan if (ptr) 37fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurPtr = ptr; 38fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan else 39fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan CurPtr = CurBuf->getBufferStart(); 40de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 41fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan TokStart = 0; 42fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan} 43fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan 444651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// ReturnError - Set the error to the specified string at the specified 453f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar/// location. This is defined to always return AsmToken::Error. 46cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { 4779036e421f22cf3f661386c560fda36aa5bd04ccSean Callanan SetError(SMLoc::getFromPointer(Loc), Msg); 48de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 493f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Error, StringRef(Loc, 0)); 504651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 514651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 52a59e8779964992457ada1af6a5f48068523cfd42Chris Lattnerint AsmLexer::getNextChar() { 53a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner char CurChar = *CurPtr++; 54a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner switch (CurChar) { 55a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner default: 56a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner return (unsigned char)CurChar; 57fd0b0288e2ee5ccf3f1d47090542710c67a77cf7Sean Callanan case 0: 58a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // A nul character in the stream is either the end of the current buffer or 59a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // a random nul in the file. Disambiguate that here. 60a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner if (CurPtr-1 != CurBuf->getBufferEnd()) 61a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner return 0; // Just whitespace. 62de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 63a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // Otherwise, return end of file. 64de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach --CurPtr; // Another call to lex will return EOF again. 65a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner return EOF; 66a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner } 67a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner} 68a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner 694f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? 704f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// 714f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// The leading integral digit sequence and dot should have already been 724f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// consumed, some or all of the fractional digit sequence *can* have been 734f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar/// consumed. 744f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel DunbarAsmToken AsmLexer::LexFloatLiteral() { 754f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // Skip the fractional digit sequence. 764f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar while (isdigit(*CurPtr)) 774f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 784f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar 794f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // Check for exponent; we intentionally accept a slighlty wider set of 804f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // literals here and rely on the upstream client to reject invalid ones (e.g., 814f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // "1e+"). 824f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar if (*CurPtr == 'e' || *CurPtr == 'E') { 834f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 844f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar if (*CurPtr == '-' || *CurPtr == '+') 854f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 864f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar while (isdigit(*CurPtr)) 874f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar ++CurPtr; 884f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar } 894f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar 904f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar return AsmToken(AsmToken::Real, 914f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar StringRef(TokStart, CurPtr - TokStart)); 924f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar} 934f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar 945fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 9554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbarstatic bool IsIdentifierChar(char c) { 9654f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@'; 9754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar} 98cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexIdentifier() { 9954f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar // Check for floating point literals. 10054f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { 1014f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar // Disambiguate a .1243foo identifier from a floating literal. 10254f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar while (isdigit(*CurPtr)) 10354f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar ++CurPtr; 1044f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr)) 1054f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar return LexFloatLiteral(); 10654f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar } 10754f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar 10854f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar while (IsIdentifierChar(*CurPtr)) 1094651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 110de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 111d305035155ef3d138e102434bf5a733ea2e32405Chris Lattner // Handle . as a special case. 1125fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar if (CurPtr == TokStart+1 && TokStart[0] == '.') 1135fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); 114de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 1153f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); 1164651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 1174651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 1184651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// LexSlash: Slash: / 1194651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// C-Style Comment: /* ... */ 120cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexSlash() { 121383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar switch (*CurPtr) { 122383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar case '*': break; // C style comment. 123383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar case '/': return ++CurPtr, LexLineComment(); 124bdf90d679befafe70b93082042266ba58a9ad0b2Daniel Dunbar default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1)); 125383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar } 1264651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 1274651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // C Style comment. 1284651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; // skip the star. 1294651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner while (1) { 1304651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner int CurChar = getNextChar(); 1314651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner switch (CurChar) { 1324651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case EOF: 13327aa7d259b416a9d1bf837ed2c3c11463367b11cChris Lattner return ReturnError(TokStart, "unterminated comment"); 1344651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '*': 1354651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // End of the comment? 1364651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurPtr[0] != '/') break; 137de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 1384651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; // End the */. 1394651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner return LexToken(); 1404651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 1414651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 1424651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 1434651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 144383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar/// LexLineComment: Comment: #[^\n]* 145383a4a8db04456b8e5a59a35b3f967c4aa9c90cfDaniel Dunbar/// : //[^\n]* 146cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexLineComment() { 147cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar // FIXME: This is broken if we happen to a comment at the end of a file, which 148cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar // was .included, and which doesn't end with a newline. 1494651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner int CurChar = getNextChar(); 1500ecd825e54f2235c133b44c967a612551633106cChris Lattner while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) 1514651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner CurChar = getNextChar(); 152de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 1534651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurChar == EOF) 1543f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); 1553f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); 1564651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 1574651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 158a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattnerstatic void SkipIgnoredIntegerSuffix(const char *&CurPtr) { 159a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner if (CurPtr[0] == 'L' && CurPtr[1] == 'L') 160a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner CurPtr += 2; 161a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L') 162a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner CurPtr += 3; 163a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner} 164a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner 165d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier// Look ahead to search for first non-hex digit, if it's [hH], then we treat the 166d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier// integer as a hexadecimal, possibly with leading zeroes. 167d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosierstatic unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { 168d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier const char *FirstHex = 0; 169d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier const char *LookAhead = CurPtr; 170d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier while (1) { 171d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (isdigit(*LookAhead)) { 172d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++LookAhead; 173d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } else if (isxdigit(*LookAhead)) { 174d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (!FirstHex) 175d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier FirstHex = LookAhead; 176d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++LookAhead; 177d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } else { 178d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier break; 179d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } 180d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier } 181d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; 182e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; 183d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (isHex) 184d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier return 16; 185d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier return DefaultRadix; 186d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier} 187d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 1884651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// LexDigit: First character is [0-9]. 1894651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// Local Label: [0-9][:] 190e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola/// Forward/Backward Label: [0-9][fb] 191e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola/// Binary integer: 0b[01]+ 1924651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// Octal integer: 0[0-7]+ 193d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] 1944651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner/// Decimal integer: [1-9][0-9]* 195cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexDigit() { 1964651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Decimal integer: [1-9][0-9]* 197facb34b41cea284b5a0b4992ff619e5cfd5e6a22Daniel Dunbar if (CurPtr[-1] != '0' || CurPtr[0] == '.') { 198d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier unsigned Radix = doLookAhead(CurPtr, 10); 199e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola bool isHex = Radix == 16; 20054f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar // Check for floating point literals. 201e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { 20254f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar ++CurPtr; 2034f2afe3d399eb9485dc605f3cc3a6382dcae757aDaniel Dunbar return LexFloatLiteral(); 20454f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar } 20554f0a625b0eb9afeece652a8462755010d237c78Daniel Dunbar 206d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner StringRef Result(TokStart, CurPtr - TokStart); 207a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner 208d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner long long Value; 209d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (Result.getAsInteger(Radix, Value)) { 2107ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner // Allow positive values that are too large to fit into a signed 64-bit 2117ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner // integer, but that do fit in an unsigned one, we just convert them over. 2127ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner unsigned long long UValue; 213d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (Result.getAsInteger(Radix, UValue)) 214e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola return ReturnError(TokStart, !isHex ? "invalid decimal number" : 215d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier "invalid hexdecimal number"); 2167ab3cc32d6bd3c3166184e27713c91f5317c7f85Chris Lattner Value = (long long)UValue; 2173a151be8d5f85ca12737f5664ef91192014a4f77Chris Lattner } 218de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 219d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier // Consume the [bB][hH]. 220d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (Radix == 2 || Radix == 16) 221d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++CurPtr; 222d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 223a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 224a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // suffixes on integer literals. 225a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 226de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 227d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner return AsmToken(AsmToken::Integer, Result, Value); 2284651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 229de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2304651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (*CurPtr == 'b') { 231e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola ++CurPtr; 232e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola // See if we actually have "0b" as part of something like "jmp 0b\n" 233e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola if (!isdigit(CurPtr[0])) { 234e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola --CurPtr; 235e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola StringRef Result(TokStart, CurPtr - TokStart); 236e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola return AsmToken(AsmToken::Integer, Result, 0); 237e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola } 238e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola const char *NumStart = CurPtr; 2394651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner while (CurPtr[0] == '0' || CurPtr[0] == '1') 2404651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 241de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2424651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Requires at least one binary digit. 2434651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurPtr == NumStart) 24405f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(TokStart, "invalid binary number"); 245de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 246d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner StringRef Result(TokStart, CurPtr - TokStart); 247de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 248d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner long long Value; 249a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner if (Result.substr(2).getAsInteger(2, Value)) 25005f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(TokStart, "invalid binary number"); 251de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 252a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 253a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // suffixes on integer literals. 254a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 255de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 256d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner return AsmToken(AsmToken::Integer, Result, Value); 2574651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 258de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2594651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (*CurPtr == 'x') { 2604651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 2614651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner const char *NumStart = CurPtr; 2624651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner while (isxdigit(CurPtr[0])) 2634651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner ++CurPtr; 264de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2654651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Requires at least one hex digit. 2664651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner if (CurPtr == NumStart) 26705f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(CurPtr-2, "invalid hexadecimal number"); 26803949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner 26903949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner unsigned long long Result; 27003949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) 27105f9e4e8bd2347826c50ec391ea4ec8caffe45efEric Christopher return ReturnError(TokStart, "invalid hexadecimal number"); 272de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 273d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier // Consume the optional [hH]. 274d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (*CurPtr == 'h' || *CurPtr == 'H') 275d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++CurPtr; 276d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 277a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 278a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // suffixes on integer literals. 279a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 280de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 2813f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), 28203949c9db3e8f02d6ec7cd53782507e3a4e9e4fcChris Lattner (int64_t)Result); 2834651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner } 284de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 28550e75bfc29269def44981ab5f109334d95f55007Matt Beaumont-Gay // Either octal or hexadecimal. 286d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner long long Value; 287d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier unsigned Radix = doLookAhead(CurPtr, 8); 288e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola bool isHex = Radix == 16; 289d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier StringRef Result(TokStart, CurPtr - TokStart); 290d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier if (Result.getAsInteger(Radix, Value)) 291e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola return ReturnError(TokStart, !isHex ? "invalid octal number" : 29253e5bb70db34b736eed01c1580af1afd7314a2d8Chad Rosier "invalid hexdecimal number"); 293de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 294e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola // Consume the [hH]. 295e186d7191c2cf95753a9790b1490df8a07416daaRafael Espindola if (Radix == 16) 296d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier ++CurPtr; 297d556fd129026f6e3fa6ea9c2c70ba489bff18954Chad Rosier 298a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 299a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner // suffixes on integer literals. 300a78c67e9bbf6ff0253945f3ba5bc178ece76d886Chris Lattner SkipIgnoredIntegerSuffix(CurPtr); 301de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 302d5a7e357a22b2e916a5b5e9d200ad284b95e35f1Chris Lattner return AsmToken(AsmToken::Integer, Result, Value); 3034651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner} 3044651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 3057529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky/// LexSingleQuote: Integer: 'b' 3067529b164104f802b59fbf96f5a88d9709ac3ff1aRoman DivackyAsmToken AsmLexer::LexSingleQuote() { 3077529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky int CurChar = getNextChar(); 3087529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3097529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (CurChar == '\\') 3107529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky CurChar = getNextChar(); 3117529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3127529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (CurChar == EOF) 3137529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky return ReturnError(TokStart, "unterminated single quote"); 3147529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3157529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky CurChar = getNextChar(); 3167529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3177529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (CurChar != '\'') 3187529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky return ReturnError(TokStart, "single quote way too long"); 3197529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3207529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky // The idea here being that 'c' is basically just an integral 3217529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky // constant. 3227529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky StringRef Res = StringRef(TokStart,CurPtr - TokStart); 3237529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky long long Value; 3247529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3257529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky if (Res.startswith("\'\\")) { 3267529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky char theChar = Res[2]; 3277529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky switch (theChar) { 3287529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky default: Value = theChar; break; 3297529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case '\'': Value = '\''; break; 3307529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case 't': Value = '\t'; break; 3317529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case 'n': Value = '\n'; break; 3327529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case 'b': Value = '\b'; break; 3337529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky } 3347529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky } else 3357529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky Value = TokStart[1]; 3367529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 337de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach return AsmToken(AsmToken::Integer, Res, Value); 3387529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky} 3397529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 3407529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky 34110a907d70fb54c40eecabb889e81c79b44092221Chris Lattner/// LexQuote: String: "..." 342cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexQuote() { 34310a907d70fb54c40eecabb889e81c79b44092221Chris Lattner int CurChar = getNextChar(); 34410a907d70fb54c40eecabb889e81c79b44092221Chris Lattner // TODO: does gas allow multiline string constants? 34510a907d70fb54c40eecabb889e81c79b44092221Chris Lattner while (CurChar != '"') { 34610a907d70fb54c40eecabb889e81c79b44092221Chris Lattner if (CurChar == '\\') { 34710a907d70fb54c40eecabb889e81c79b44092221Chris Lattner // Allow \", etc. 34810a907d70fb54c40eecabb889e81c79b44092221Chris Lattner CurChar = getNextChar(); 34910a907d70fb54c40eecabb889e81c79b44092221Chris Lattner } 350de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 35114ee48a5bae352780b767a14bd97e8e91800a95bChris Lattner if (CurChar == EOF) 35214ee48a5bae352780b767a14bd97e8e91800a95bChris Lattner return ReturnError(TokStart, "unterminated string constant"); 35310a907d70fb54c40eecabb889e81c79b44092221Chris Lattner 35410a907d70fb54c40eecabb889e81c79b44092221Chris Lattner CurChar = getNextChar(); 35510a907d70fb54c40eecabb889e81c79b44092221Chris Lattner } 356de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 3573f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 35810a907d70fb54c40eecabb889e81c79b44092221Chris Lattner} 35910a907d70fb54c40eecabb889e81c79b44092221Chris Lattner 360ff4bc460c52c1f285d8a56da173641bf92d49e3fChris LattnerStringRef AsmLexer::LexUntilEndOfStatement() { 361ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner TokStart = CurPtr; 362ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner 363d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach while (!isAtStartOfComment(*CurPtr) && // Start of line comment. 364d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach !isAtStatementSeparator(CurPtr) && // End of statement marker. 365ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner *CurPtr != '\n' && 366ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner *CurPtr != '\r' && 3679823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { 368ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner ++CurPtr; 3699823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby } 370ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner return StringRef(TokStart, CurPtr-TokStart); 371ff4bc460c52c1f285d8a56da173641bf92d49e3fChris Lattner} 3724651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner 373f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin EnderbyStringRef AsmLexer::LexUntilEndOfLine() { 374f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby TokStart = CurPtr; 375f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby 376f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby while (*CurPtr != '\n' && 377f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby *CurPtr != '\r' && 378f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { 379f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby ++CurPtr; 380f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby } 381f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby return StringRef(TokStart, CurPtr-TokStart); 382f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby} 383f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby 384b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderbybool AsmLexer::isAtStartOfComment(char Char) { 385cec545097cf1a81c0936cf62e39a81940d9f720fChris Lattner // FIXME: This won't work for multi-character comment indicators like "//". 386cec545097cf1a81c0936cf62e39a81940d9f720fChris Lattner return Char == *MAI.getCommentString(); 387b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby} 388b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby 389d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbachbool AsmLexer::isAtStatementSeparator(const char *Ptr) { 390d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach return strncmp(Ptr, MAI.getSeparatorString(), 391d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach strlen(MAI.getSeparatorString())) == 0; 392d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach} 393d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach 394cb358b63a742c72f440ea008f14d65e049e725adDaniel DunbarAsmToken AsmLexer::LexToken() { 395a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner TokStart = CurPtr; 396a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // This always consumes at least one character. 397a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner int CurChar = getNextChar(); 398de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 399f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby if (isAtStartOfComment(CurChar)) { 400f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby // If this comment starts with a '#', then return the Hash token and let 401f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby // the assembler parser see if it can be parsed as a cpp line filename 402f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby // comment. We do this only if we are at the start of a line. 403f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby if (CurChar == '#' && isAtStartOfLine) 404f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 405f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby isAtStartOfLine = true; 406b5db83084073d74501f3e2a7684d28240a047b92Kevin Enderby return LexLineComment(); 407f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby } 408d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach if (isAtStatementSeparator(TokStart)) { 409d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach CurPtr += strlen(MAI.getSeparatorString()) - 1; 410d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach return AsmToken(AsmToken::EndOfStatement, 411d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach StringRef(TokStart, strlen(MAI.getSeparatorString()))); 412d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach } 4139823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby 41470796ca867132fd8c767301061afb9760cd69167Jim Grosbach // If we're missing a newline at EOF, make sure we still get an 41570796ca867132fd8c767301061afb9760cd69167Jim Grosbach // EndOfStatement token before the Eof token. 41670796ca867132fd8c767301061afb9760cd69167Jim Grosbach if (CurChar == EOF && !isAtStartOfLine) { 41770796ca867132fd8c767301061afb9760cd69167Jim Grosbach isAtStartOfLine = true; 41870796ca867132fd8c767301061afb9760cd69167Jim Grosbach return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 41970796ca867132fd8c767301061afb9760cd69167Jim Grosbach } 42070796ca867132fd8c767301061afb9760cd69167Jim Grosbach 42170796ca867132fd8c767301061afb9760cd69167Jim Grosbach isAtStartOfLine = false; 422a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner switch (CurChar) { 423a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner default: 4245fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 4255fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') 4264651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner return LexIdentifier(); 427de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 428a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner // Unknown character, emit an error. 42927aa7d259b416a9d1bf837ed2c3c11463367b11cChris Lattner return ReturnError(TokStart, "invalid character in input"); 4303f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); 431a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner case 0: 432a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner case ' ': 433a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner case '\t': 4347b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd if (SkipSpace) { 4357b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd // Ignore whitespace. 4367b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd return LexToken(); 4377b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd } else { 4387b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd int len = 1; 4397b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd while (*CurPtr==' ' || *CurPtr=='\t') { 4407b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd CurPtr++; 4417b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd len++; 4427b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd } 4437b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd return AsmToken(AsmToken::Space, StringRef(TokStart, len)); 4447b6f2034ac355bd3b3cc88960bf8d0e694fe3db4Preston Gurd } 4454651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '\n': // FALL THROUGH. 446d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach case '\r': 447f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71Kevin Enderby isAtStartOfLine = true; 448d31d304f83f9c8df6870057509414b8d004bc8daJim Grosbach return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 4493f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); 4503f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); 4513f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); 4523f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); 4533f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); 4543f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); 455fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); 456fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); 457fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); 458fb0f0dedd74dab13737a77671a724ee88465f5daKevin Enderby case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); 4593f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); 4603f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); 4615fe03c023cd47a718759a3a4dc1d8e33297ae0fcDaniel Dunbar case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); 462924c5e58f2a7c89019000e7dee3391dcebdf8a21Matt Fleming case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); 463653664471333f316020e96dd3d664f4984f66a65Rafael Espindola case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); 464de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '=': 465475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '=') 4663f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); 4673f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); 468de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '|': 469475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '|') 4703f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); 4713f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); 4723f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); 473de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '&': 474475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '&') 4753f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); 4763f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); 477de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '!': 478475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar if (*CurPtr == '=') 4793f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); 4803f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); 4817b4608dfa018455021050ccd31d3c49aaecf7ff6Kevin Enderby case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); 4824651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '/': return LexSlash(); 4839823ca971d5cb475401e59fde244caf5087c74a1Kevin Enderby case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 4847529b164104f802b59fbf96f5a88d9709ac3ff1aRoman Divacky case '\'': return LexSingleQuote(); 48510a907d70fb54c40eecabb889e81c79b44092221Chris Lattner case '"': return LexQuote(); 4864651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '0': case '1': case '2': case '3': case '4': 4874651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner case '5': case '6': case '7': case '8': case '9': 4884651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner return LexDigit(); 4898dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner case '<': 490475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar switch (*CurPtr) { 491de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, 492cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 493de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, 494cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 495de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, 496cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 4973f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); 4988dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner } 4998dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner case '>': 500475839e9a97a0c0282e107d14fd1dc6e5f223435Daniel Dunbar switch (*CurPtr) { 501de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, 502cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 503de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, 504cb358b63a742c72f440ea008f14d65e049e725adDaniel Dunbar StringRef(TokStart, 2)); 5053f87233d700eb4316cfaad59477834d2f5a2503bDaniel Dunbar default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); 5068dfbe6c853e3e48b6e7b5957a4e028835ffe4400Chris Lattner } 507de2f5f423bc87f6f1ee0d02862b00cc32940a93aJim Grosbach 5084651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // TODO: Quoted identifiers (objc methods etc) 5094651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // local labels: [0-9][:] 5104651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Forward/backward labels: [0-9][fb] 5114651bca31bdad27184fa0d36640bf5ef1d83cf5cChris Lattner // Integers, fp constants, character constants. 512a59e8779964992457ada1af6a5f48068523cfd42Chris Lattner } 51366b9f29df22a84f25007c1a84b4d0bb8060552b9Duncan Sands} 514