LLLexer.cpp revision d343c6b70ec03b357d42e47ce7c00b3c3cb78efd
1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Implement the Lexer for .ll files. 11// 12//===----------------------------------------------------------------------===// 13 14#include "LLLexer.h" 15#include "ParserInternals.h" 16#include "llvm/Support/MemoryBuffer.h" 17 18#include <list> 19#include "llvmAsmParser.h" 20using namespace llvm; 21 22//===----------------------------------------------------------------------===// 23// Helper functions. 24//===----------------------------------------------------------------------===// 25 26// atoull - Convert an ascii string of decimal digits into the unsigned long 27// long representation... this does not have to do input error checking, 28// because we know that the input will be matched by a suitable regex... 29// 30static uint64_t atoull(const char *Buffer, const char *End) { 31 uint64_t Result = 0; 32 for (; Buffer != End; Buffer++) { 33 uint64_t OldRes = Result; 34 Result *= 10; 35 Result += *Buffer-'0'; 36 if (Result < OldRes) { // Uh, oh, overflow detected!!! 37 GenerateError("constant bigger than 64 bits detected!"); 38 return 0; 39 } 40 } 41 return Result; 42} 43 44static uint64_t HexIntToVal(const char *Buffer, const char *End) { 45 uint64_t Result = 0; 46 for (; Buffer != End; ++Buffer) { 47 uint64_t OldRes = Result; 48 Result *= 16; 49 char C = *Buffer; 50 if (C >= '0' && C <= '9') 51 Result += C-'0'; 52 else if (C >= 'A' && C <= 'F') 53 Result += C-'A'+10; 54 else if (C >= 'a' && C <= 'f') 55 Result += C-'a'+10; 56 57 if (Result < OldRes) { // Uh, oh, overflow detected!!! 58 GenerateError("constant bigger than 64 bits detected!"); 59 return 0; 60 } 61 } 62 return Result; 63} 64 65// HexToFP - Convert the ascii string in hexadecimal format to the floating 66// point representation of it. 67// 68static double HexToFP(const char *Buffer, const char *End) { 69 return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double 70} 71 72static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ 73 Pair[0] = 0; 74 for (int i=0; i<16; i++, Buffer++) { 75 assert(Buffer != End); 76 Pair[0] *= 16; 77 char C = *Buffer; 78 if (C >= '0' && C <= '9') 79 Pair[0] += C-'0'; 80 else if (C >= 'A' && C <= 'F') 81 Pair[0] += C-'A'+10; 82 else if (C >= 'a' && C <= 'f') 83 Pair[0] += C-'a'+10; 84 } 85 Pair[1] = 0; 86 for (int i=0; i<16 && Buffer != End; i++, Buffer++) { 87 Pair[1] *= 16; 88 char C = *Buffer; 89 if (C >= '0' && C <= '9') 90 Pair[1] += C-'0'; 91 else if (C >= 'A' && C <= 'F') 92 Pair[1] += C-'A'+10; 93 else if (C >= 'a' && C <= 'f') 94 Pair[1] += C-'a'+10; 95 } 96 if (Buffer != End) 97 GenerateError("constant bigger than 128 bits detected!"); 98} 99 100// UnEscapeLexed - Run through the specified buffer and change \xx codes to the 101// appropriate character. 102static void UnEscapeLexed(std::string &Str) { 103 if (Str.empty()) return; 104 105 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); 106 char *BOut = Buffer; 107 for (char *BIn = Buffer; BIn != EndBuffer; ) { 108 if (BIn[0] == '\\') { 109 if (BIn < EndBuffer-1 && BIn[1] == '\\') { 110 *BOut++ = '\\'; // Two \ becomes one 111 BIn += 2; 112 } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { 113 char Tmp = BIn[3]; BIn[3] = 0; // Terminate string 114 *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number 115 BIn[3] = Tmp; // Restore character 116 BIn += 3; // Skip over handled chars 117 ++BOut; 118 } else { 119 *BOut++ = *BIn++; 120 } 121 } else { 122 *BOut++ = *BIn++; 123 } 124 } 125 Str.resize(BOut-Buffer); 126} 127 128/// isLabelChar - Return true for [-a-zA-Z$._0-9]. 129static bool isLabelChar(char C) { 130 return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; 131} 132 133 134/// isLabelTail - Return true if this pointer points to a valid end of a label. 135static const char *isLabelTail(const char *CurPtr) { 136 while (1) { 137 if (CurPtr[0] == ':') return CurPtr+1; 138 if (!isLabelChar(CurPtr[0])) return 0; 139 ++CurPtr; 140 } 141} 142 143 144 145//===----------------------------------------------------------------------===// 146// Lexer definition. 147//===----------------------------------------------------------------------===// 148 149// FIXME: REMOVE THIS. 150#define YYEOF 0 151#define YYERROR -2 152 153LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) { 154 CurPtr = CurBuf->getBufferStart(); 155} 156 157std::string LLLexer::getFilename() const { 158 return CurBuf->getBufferIdentifier(); 159} 160 161int LLLexer::getNextChar() { 162 char CurChar = *CurPtr++; 163 switch (CurChar) { 164 default: return (unsigned char)CurChar; 165 case 0: 166 // A nul character in the stream is either the end of the current buffer or 167 // a random nul in the file. Disambiguate that here. 168 if (CurPtr-1 != CurBuf->getBufferEnd()) 169 return 0; // Just whitespace. 170 171 // Otherwise, return end of file. 172 --CurPtr; // Another call to lex will return EOF again. 173 return EOF; 174 case '\n': 175 case '\r': 176 // Handle the newline character by ignoring it and incrementing the line 177 // count. However, be careful about 'dos style' files with \n\r in them. 178 // Only treat a \n\r or \r\n as a single line. 179 if ((*CurPtr == '\n' || (*CurPtr == '\r')) && 180 *CurPtr != CurChar) 181 ++CurPtr; // Eat the two char newline sequence. 182 183 ++CurLineNo; 184 return '\n'; 185 } 186} 187 188 189int LLLexer::LexToken() { 190 TokStart = CurPtr; 191 192 int CurChar = getNextChar(); 193 194 switch (CurChar) { 195 default: 196 // Handle letters: [a-zA-Z_] 197 if (isalpha(CurChar) || CurChar == '_') 198 return LexIdentifier(); 199 200 return CurChar; 201 case EOF: return YYEOF; 202 case 0: 203 case ' ': 204 case '\t': 205 case '\n': 206 case '\r': 207 // Ignore whitespace. 208 return LexToken(); 209 case '+': return LexPositive(); 210 case '@': return LexAt(); 211 case '%': return LexPercent(); 212 case '"': return LexQuote(); 213 case '.': 214 if (const char *Ptr = isLabelTail(CurPtr)) { 215 CurPtr = Ptr; 216 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 217 return LABELSTR; 218 } 219 if (CurPtr[0] == '.' && CurPtr[1] == '.') { 220 CurPtr += 2; 221 return DOTDOTDOT; 222 } 223 return '.'; 224 case '$': 225 if (const char *Ptr = isLabelTail(CurPtr)) { 226 CurPtr = Ptr; 227 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 228 return LABELSTR; 229 } 230 return '$'; 231 case ';': 232 SkipLineComment(); 233 return LexToken(); 234 case '0': case '1': case '2': case '3': case '4': 235 case '5': case '6': case '7': case '8': case '9': 236 case '-': 237 return LexDigitOrNegative(); 238 } 239} 240 241void LLLexer::SkipLineComment() { 242 while (1) { 243 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) 244 return; 245 } 246} 247 248/// LexAt - Lex all tokens that start with an @ character: 249/// AtStringConstant @\"[^\"]*\" 250/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]* 251/// GlobalVarID @[0-9]+ 252int LLLexer::LexAt() { 253 // Handle AtStringConstant: @\"[^\"]*\" 254 if (CurPtr[0] == '"') { 255 ++CurPtr; 256 257 while (1) { 258 int CurChar = getNextChar(); 259 260 if (CurChar == EOF) { 261 GenerateError("End of file in global variable name"); 262 return YYERROR; 263 } 264 if (CurChar == '"') { 265 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 266 UnEscapeLexed(*llvmAsmlval.StrVal); 267 return ATSTRINGCONSTANT; 268 } 269 } 270 } 271 272 // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* 273 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 274 CurPtr[0] == '.' || CurPtr[0] == '_') { 275 ++CurPtr; 276 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 277 CurPtr[0] == '.' || CurPtr[0] == '_') 278 ++CurPtr; 279 280 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @ 281 return GLOBALVAR; 282 } 283 284 // Handle GlobalVarID: @[0-9]+ 285 if (isdigit(CurPtr[0])) { 286 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); 287 288 uint64_t Val = atoull(TokStart+1, CurPtr); 289 if ((unsigned)Val != Val) 290 GenerateError("Invalid value number (too large)!"); 291 llvmAsmlval.UIntVal = unsigned(Val); 292 return GLOBALVAL_ID; 293 } 294 295 return '@'; 296} 297 298 299/// LexPercent - Lex all tokens that start with a % character: 300/// PctStringConstant %\"[^\"]*\" 301/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]* 302/// LocalVarID %[0-9]+ 303int LLLexer::LexPercent() { 304 // Handle PctStringConstant: %\"[^\"]*\" 305 if (CurPtr[0] == '"') { 306 ++CurPtr; 307 308 while (1) { 309 int CurChar = getNextChar(); 310 311 if (CurChar == EOF) { 312 GenerateError("End of file in local variable name"); 313 return YYERROR; 314 } 315 if (CurChar == '"') { 316 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 317 UnEscapeLexed(*llvmAsmlval.StrVal); 318 return PCTSTRINGCONSTANT; 319 } 320 } 321 } 322 323 // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* 324 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 325 CurPtr[0] == '.' || CurPtr[0] == '_') { 326 ++CurPtr; 327 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 328 CurPtr[0] == '.' || CurPtr[0] == '_') 329 ++CurPtr; 330 331 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip % 332 return LOCALVAR; 333 } 334 335 // Handle LocalVarID: %[0-9]+ 336 if (isdigit(CurPtr[0])) { 337 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); 338 339 uint64_t Val = atoull(TokStart+1, CurPtr); 340 if ((unsigned)Val != Val) 341 GenerateError("Invalid value number (too large)!"); 342 llvmAsmlval.UIntVal = unsigned(Val); 343 return LOCALVAL_ID; 344 } 345 346 return '%'; 347} 348 349/// LexQuote - Lex all tokens that start with a " character: 350/// QuoteLabel "[^"]+": 351/// StringConstant "[^"]*" 352int LLLexer::LexQuote() { 353 while (1) { 354 int CurChar = getNextChar(); 355 356 if (CurChar == EOF) { 357 GenerateError("End of file in quoted string"); 358 return YYERROR; 359 } 360 361 if (CurChar != '"') continue; 362 363 if (CurPtr[0] != ':') { 364 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1); 365 UnEscapeLexed(*llvmAsmlval.StrVal); 366 return STRINGCONSTANT; 367 } 368 369 ++CurPtr; 370 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2); 371 UnEscapeLexed(*llvmAsmlval.StrVal); 372 return LABELSTR; 373 } 374} 375 376static bool JustWhitespaceNewLine(const char *&Ptr) { 377 const char *ThisPtr = Ptr; 378 while (*ThisPtr == ' ' || *ThisPtr == '\t') 379 ++ThisPtr; 380 if (*ThisPtr == '\n' || *ThisPtr == '\r') { 381 Ptr = ThisPtr; 382 return true; 383 } 384 return false; 385} 386 387 388/// LexIdentifier: Handle several related productions: 389/// Label [-a-zA-Z$._0-9]+: 390/// IntegerType i[0-9]+ 391/// Keyword sdiv, float, ... 392/// HexIntConstant [us]0x[0-9A-Fa-f]+ 393int LLLexer::LexIdentifier() { 394 const char *StartChar = CurPtr; 395 const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; 396 const char *KeywordEnd = 0; 397 398 for (; isLabelChar(*CurPtr); ++CurPtr) { 399 // If we decide this is an integer, remember the end of the sequence. 400 if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; 401 if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; 402 } 403 404 // If we stopped due to a colon, this really is a label. 405 if (*CurPtr == ':') { 406 llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++); 407 return LABELSTR; 408 } 409 410 // Otherwise, this wasn't a label. If this was valid as an integer type, 411 // return it. 412 if (IntEnd == 0) IntEnd = CurPtr; 413 if (IntEnd != StartChar) { 414 CurPtr = IntEnd; 415 uint64_t NumBits = atoull(StartChar, CurPtr); 416 if (NumBits < IntegerType::MIN_INT_BITS || 417 NumBits > IntegerType::MAX_INT_BITS) { 418 GenerateError("Bitwidth for integer type out of range!"); 419 return YYERROR; 420 } 421 const Type* Ty = IntegerType::get(NumBits); 422 llvmAsmlval.PrimType = Ty; 423 return INTTYPE; 424 } 425 426 // Otherwise, this was a letter sequence. See which keyword this is. 427 if (KeywordEnd == 0) KeywordEnd = CurPtr; 428 CurPtr = KeywordEnd; 429 --StartChar; 430 unsigned Len = CurPtr-StartChar; 431#define KEYWORD(STR, TOK) \ 432 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK; 433 434 KEYWORD("begin", BEGINTOK); 435 KEYWORD("end", ENDTOK); 436 KEYWORD("true", TRUETOK); 437 KEYWORD("false", FALSETOK); 438 KEYWORD("declare", DECLARE); 439 KEYWORD("define", DEFINE); 440 KEYWORD("global", GLOBAL); 441 KEYWORD("constant", CONSTANT); 442 443 KEYWORD("internal", INTERNAL); 444 KEYWORD("linkonce", LINKONCE); 445 KEYWORD("weak", WEAK); 446 KEYWORD("appending", APPENDING); 447 KEYWORD("dllimport", DLLIMPORT); 448 KEYWORD("dllexport", DLLEXPORT); 449 KEYWORD("hidden", HIDDEN); 450 KEYWORD("protected", PROTECTED); 451 KEYWORD("extern_weak", EXTERN_WEAK); 452 KEYWORD("external", EXTERNAL); 453 KEYWORD("thread_local", THREAD_LOCAL); 454 KEYWORD("zeroinitializer", ZEROINITIALIZER); 455 KEYWORD("undef", UNDEF); 456 KEYWORD("null", NULL_TOK); 457 KEYWORD("to", TO); 458 KEYWORD("tail", TAIL); 459 KEYWORD("target", TARGET); 460 KEYWORD("triple", TRIPLE); 461 KEYWORD("deplibs", DEPLIBS); 462 KEYWORD("datalayout", DATALAYOUT); 463 KEYWORD("volatile", VOLATILE); 464 KEYWORD("align", ALIGN); 465 KEYWORD("section", SECTION); 466 KEYWORD("alias", ALIAS); 467 KEYWORD("module", MODULE); 468 KEYWORD("asm", ASM_TOK); 469 KEYWORD("sideeffect", SIDEEFFECT); 470 471 KEYWORD("cc", CC_TOK); 472 KEYWORD("ccc", CCC_TOK); 473 KEYWORD("fastcc", FASTCC_TOK); 474 KEYWORD("coldcc", COLDCC_TOK); 475 KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK); 476 KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK); 477 478 KEYWORD("signext", SIGNEXT); 479 KEYWORD("zeroext", ZEROEXT); 480 KEYWORD("inreg", INREG); 481 KEYWORD("sret", SRET); 482 KEYWORD("nounwind", NOUNWIND); 483 KEYWORD("noreturn", NORETURN); 484 KEYWORD("noalias", NOALIAS); 485 KEYWORD("byval", BYVAL); 486 KEYWORD("nest", NEST); 487 KEYWORD("pure", PURE); 488 KEYWORD("const", CONST); 489 490 KEYWORD("type", TYPE); 491 KEYWORD("opaque", OPAQUE); 492 493 KEYWORD("eq" , EQ); 494 KEYWORD("ne" , NE); 495 KEYWORD("slt", SLT); 496 KEYWORD("sgt", SGT); 497 KEYWORD("sle", SLE); 498 KEYWORD("sge", SGE); 499 KEYWORD("ult", ULT); 500 KEYWORD("ugt", UGT); 501 KEYWORD("ule", ULE); 502 KEYWORD("uge", UGE); 503 KEYWORD("oeq", OEQ); 504 KEYWORD("one", ONE); 505 KEYWORD("olt", OLT); 506 KEYWORD("ogt", OGT); 507 KEYWORD("ole", OLE); 508 KEYWORD("oge", OGE); 509 KEYWORD("ord", ORD); 510 KEYWORD("uno", UNO); 511 KEYWORD("ueq", UEQ); 512 KEYWORD("une", UNE); 513#undef KEYWORD 514 515 // Keywords for types. 516#define TYPEKEYWORD(STR, LLVMTY, TOK) \ 517 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 518 llvmAsmlval.PrimType = LLVMTY; return TOK; } 519 TYPEKEYWORD("void", Type::VoidTy, VOID); 520 TYPEKEYWORD("float", Type::FloatTy, FLOAT); 521 TYPEKEYWORD("double", Type::DoubleTy, DOUBLE); 522 TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80); 523 TYPEKEYWORD("fp128", Type::FP128Ty, FP128); 524 TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128); 525 TYPEKEYWORD("label", Type::LabelTy, LABEL); 526#undef TYPEKEYWORD 527 528 // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is 529 // to avoid conflicting with the sext/zext instructions, below. 530 if (Len == 4 && !memcmp(StartChar, "sext", 4)) { 531 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 532 if (JustWhitespaceNewLine(CurPtr)) 533 return SIGNEXT; 534 } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { 535 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 536 if (JustWhitespaceNewLine(CurPtr)) 537 return ZEROEXT; 538 } 539 540 // Keywords for instructions. 541#define INSTKEYWORD(STR, type, Enum, TOK) \ 542 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 543 llvmAsmlval.type = Instruction::Enum; return TOK; } 544 545 INSTKEYWORD("add", BinaryOpVal, Add, ADD); 546 INSTKEYWORD("sub", BinaryOpVal, Sub, SUB); 547 INSTKEYWORD("mul", BinaryOpVal, Mul, MUL); 548 INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV); 549 INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV); 550 INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV); 551 INSTKEYWORD("urem", BinaryOpVal, URem, UREM); 552 INSTKEYWORD("srem", BinaryOpVal, SRem, SREM); 553 INSTKEYWORD("frem", BinaryOpVal, FRem, FREM); 554 INSTKEYWORD("shl", BinaryOpVal, Shl, SHL); 555 INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR); 556 INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR); 557 INSTKEYWORD("and", BinaryOpVal, And, AND); 558 INSTKEYWORD("or", BinaryOpVal, Or , OR ); 559 INSTKEYWORD("xor", BinaryOpVal, Xor, XOR); 560 INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP); 561 INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP); 562 563 INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK); 564 INSTKEYWORD("call", OtherOpVal, Call, CALL); 565 INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC); 566 INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT); 567 INSTKEYWORD("sext", CastOpVal, SExt, SEXT); 568 INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC); 569 INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT); 570 INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP); 571 INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP); 572 INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI); 573 INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI); 574 INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR); 575 INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT); 576 INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST); 577 INSTKEYWORD("select", OtherOpVal, Select, SELECT); 578 INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG); 579 INSTKEYWORD("ret", TermOpVal, Ret, RET); 580 INSTKEYWORD("br", TermOpVal, Br, BR); 581 INSTKEYWORD("switch", TermOpVal, Switch, SWITCH); 582 INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE); 583 INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND); 584 INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE); 585 586 INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC); 587 INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA); 588 INSTKEYWORD("free", MemOpVal, Free, FREE); 589 INSTKEYWORD("load", MemOpVal, Load, LOAD); 590 INSTKEYWORD("store", MemOpVal, Store, STORE); 591 INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR); 592 593 INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT); 594 INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT); 595 INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR); 596#undef INSTKEYWORD 597 598 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by 599 // the CFE to avoid forcing it to deal with 64-bit numbers. 600 if ((TokStart[0] == 'u' || TokStart[0] == 's') && 601 TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { 602 int len = CurPtr-TokStart-3; 603 uint32_t bits = len * 4; 604 APInt Tmp(bits, TokStart+3, len, 16); 605 uint32_t activeBits = Tmp.getActiveBits(); 606 if (activeBits > 0 && activeBits < bits) 607 Tmp.trunc(activeBits); 608 if (Tmp.getBitWidth() > 64) { 609 llvmAsmlval.APIntVal = new APInt(Tmp); 610 return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL; 611 } else if (TokStart[0] == 's') { 612 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 613 return ESINT64VAL; 614 } else { 615 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 616 return EUINT64VAL; 617 } 618 } 619 620 // Finally, if this is "cc1234", return this as just "cc". 621 if (TokStart[0] == 'c' && TokStart[1] == 'c') { 622 CurPtr = TokStart+2; 623 return CC_TOK; 624 } 625 626 // Finally, if this isn't known, return just a single character. 627 CurPtr = TokStart+1; 628 return TokStart[0]; 629} 630 631 632/// Lex0x: Handle productions that start with 0x, knowing that it matches and 633/// that this is not a label: 634/// HexFPConstant 0x[0-9A-Fa-f]+ 635/// HexFP80Constant 0xK[0-9A-Fa-f]+ 636/// HexFP128Constant 0xL[0-9A-Fa-f]+ 637/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 638int LLLexer::Lex0x() { 639 CurPtr = TokStart + 2; 640 641 char Kind; 642 if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { 643 Kind = *CurPtr++; 644 } else { 645 Kind = 'J'; 646 } 647 648 if (!isxdigit(CurPtr[0])) { 649 // Bad token, return it as just zero. 650 CurPtr = TokStart+1; 651 return '0'; 652 } 653 654 while (isxdigit(CurPtr[0])) 655 ++CurPtr; 656 657 if (Kind == 'J') { 658 // HexFPConstant - Floating point constant represented in IEEE format as a 659 // hexadecimal number for when exponential notation is not precise enough. 660 // Float and double only. 661 llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr)); 662 return FPVAL; 663 } 664 665 uint64_t Pair[2]; 666 HexToIntPair(TokStart+3, CurPtr, Pair); 667 switch (Kind) { 668 default: assert(0 && "Unknown kind!"); 669 case 'K': 670 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) 671 llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair)); 672 return FPVAL; 673 case 'L': 674 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) 675 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true); 676 return FPVAL; 677 case 'M': 678 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) 679 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair)); 680 return FPVAL; 681 } 682} 683 684/// LexIdentifier: Handle several related productions: 685/// Label [-a-zA-Z$._0-9]+: 686/// NInteger -[0-9]+ 687/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 688/// PInteger [0-9]+ 689/// HexFPConstant 0x[0-9A-Fa-f]+ 690/// HexFP80Constant 0xK[0-9A-Fa-f]+ 691/// HexFP128Constant 0xL[0-9A-Fa-f]+ 692/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 693int LLLexer::LexDigitOrNegative() { 694 // If the letter after the negative is a number, this is probably a label. 695 if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { 696 // Okay, this is not a number after the -, it's probably a label. 697 if (const char *End = isLabelTail(CurPtr)) { 698 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 699 CurPtr = End; 700 return LABELSTR; 701 } 702 703 return CurPtr[-1]; 704 } 705 706 // At this point, it is either a label, int or fp constant. 707 708 // Skip digits, we have at least one. 709 for (; isdigit(CurPtr[0]); ++CurPtr); 710 711 // Check to see if this really is a label afterall, e.g. "-1:". 712 if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { 713 if (const char *End = isLabelTail(CurPtr)) { 714 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 715 CurPtr = End; 716 return LABELSTR; 717 } 718 } 719 720 // If the next character is a '.', then it is a fp value, otherwise its 721 // integer. 722 if (CurPtr[0] != '.') { 723 if (TokStart[0] == '0' && TokStart[1] == 'x') 724 return Lex0x(); 725 unsigned Len = CurPtr-TokStart; 726 uint32_t numBits = ((Len * 64) / 19) + 2; 727 APInt Tmp(numBits, TokStart, Len, 10); 728 if (TokStart[0] == '-') { 729 uint32_t minBits = Tmp.getMinSignedBits(); 730 if (minBits > 0 && minBits < numBits) 731 Tmp.trunc(minBits); 732 if (Tmp.getBitWidth() > 64) { 733 llvmAsmlval.APIntVal = new APInt(Tmp); 734 return ESAPINTVAL; 735 } else { 736 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 737 return ESINT64VAL; 738 } 739 } else { 740 uint32_t activeBits = Tmp.getActiveBits(); 741 if (activeBits > 0 && activeBits < numBits) 742 Tmp.trunc(activeBits); 743 if (Tmp.getBitWidth() > 64) { 744 llvmAsmlval.APIntVal = new APInt(Tmp); 745 return EUAPINTVAL; 746 } else { 747 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 748 return EUINT64VAL; 749 } 750 } 751 } 752 753 ++CurPtr; 754 755 // Skip over [0-9]*([eE][-+]?[0-9]+)? 756 while (isdigit(CurPtr[0])) ++CurPtr; 757 758 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 759 if (isdigit(CurPtr[1]) || 760 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 761 CurPtr += 2; 762 while (isdigit(CurPtr[0])) ++CurPtr; 763 } 764 } 765 766 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 767 return FPVAL; 768} 769 770/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 771int LLLexer::LexPositive() { 772 // If the letter after the negative is a number, this is probably not a 773 // label. 774 if (!isdigit(CurPtr[0])) 775 return CurPtr[-1]; 776 777 // Skip digits. 778 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); 779 780 // At this point, we need a '.'. 781 if (CurPtr[0] != '.') { 782 CurPtr = TokStart+1; 783 return TokStart[0]; 784 } 785 786 ++CurPtr; 787 788 // Skip over [0-9]*([eE][-+]?[0-9]+)? 789 while (isdigit(CurPtr[0])) ++CurPtr; 790 791 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 792 if (isdigit(CurPtr[1]) || 793 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 794 CurPtr += 2; 795 while (isdigit(CurPtr[0])) ++CurPtr; 796 } 797 } 798 799 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 800 return FPVAL; 801} 802 803 804//===----------------------------------------------------------------------===// 805// Define the interface to this file. 806//===----------------------------------------------------------------------===// 807 808static LLLexer *TheLexer; 809 810void InitLLLexer(llvm::MemoryBuffer *MB) { 811 assert(TheLexer == 0 && "LL Lexer isn't reentrant yet"); 812 TheLexer = new LLLexer(MB); 813} 814 815int llvmAsmlex() { 816 return TheLexer->LexToken(); 817} 818const char *LLLgetTokenStart() { return TheLexer->getTokStart(); } 819unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); } 820std::string LLLgetFilename() { return TheLexer->getFilename(); } 821unsigned LLLgetLineNo() { return TheLexer->getLineNo(); } 822 823void FreeLexer() { 824 delete TheLexer; 825 TheLexer = 0; 826} 827