LLLexer.cpp revision ae3a0be92e33bc716722aa600983fc1535acb122
1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Implement the Lexer for .ll files. 11// 12//===----------------------------------------------------------------------===// 13 14#include "LLLexer.h" 15#include "llvm/DerivedTypes.h" 16#include "llvm/Instruction.h" 17#include "llvm/Support/MemoryBuffer.h" 18#include "llvm/Support/MathExtras.h" 19#include "llvm/Support/raw_ostream.h" 20#include "llvm/Assembly/Parser.h" 21#include <cstdlib> 22#include <cstring> 23using namespace llvm; 24 25bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const { 26 // Scan backward to find the start of the line. 27 const char *LineStart = ErrorLoc; 28 while (LineStart != CurBuf->getBufferStart() && 29 LineStart[-1] != '\n' && LineStart[-1] != '\r') 30 --LineStart; 31 // Get the end of the line. 32 const char *LineEnd = ErrorLoc; 33 while (LineEnd != CurBuf->getBufferEnd() && 34 LineEnd[0] != '\n' && LineEnd[0] != '\r') 35 ++LineEnd; 36 37 unsigned LineNo = 1; 38 for (const char *FP = CurBuf->getBufferStart(); FP != ErrorLoc; ++FP) 39 if (*FP == '\n') ++LineNo; 40 41 std::string LineContents(LineStart, LineEnd); 42 ErrorInfo.setError(Msg, LineNo, ErrorLoc-LineStart, LineContents); 43 return true; 44} 45 46//===----------------------------------------------------------------------===// 47// Helper functions. 48//===----------------------------------------------------------------------===// 49 50// atoull - Convert an ascii string of decimal digits into the unsigned long 51// long representation... this does not have to do input error checking, 52// because we know that the input will be matched by a suitable regex... 53// 54uint64_t LLLexer::atoull(const char *Buffer, const char *End) { 55 uint64_t Result = 0; 56 for (; Buffer != End; Buffer++) { 57 uint64_t OldRes = Result; 58 Result *= 10; 59 Result += *Buffer-'0'; 60 if (Result < OldRes) { // Uh, oh, overflow detected!!! 61 Error("constant bigger than 64 bits detected!"); 62 return 0; 63 } 64 } 65 return Result; 66} 67 68uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { 69 uint64_t Result = 0; 70 for (; Buffer != End; ++Buffer) { 71 uint64_t OldRes = Result; 72 Result *= 16; 73 char C = *Buffer; 74 if (C >= '0' && C <= '9') 75 Result += C-'0'; 76 else if (C >= 'A' && C <= 'F') 77 Result += C-'A'+10; 78 else if (C >= 'a' && C <= 'f') 79 Result += C-'a'+10; 80 81 if (Result < OldRes) { // Uh, oh, overflow detected!!! 82 Error("constant bigger than 64 bits detected!"); 83 return 0; 84 } 85 } 86 return Result; 87} 88 89void LLLexer::HexToIntPair(const char *Buffer, const char *End, 90 uint64_t Pair[2]) { 91 Pair[0] = 0; 92 for (int i=0; i<16; i++, Buffer++) { 93 assert(Buffer != End); 94 Pair[0] *= 16; 95 char C = *Buffer; 96 if (C >= '0' && C <= '9') 97 Pair[0] += C-'0'; 98 else if (C >= 'A' && C <= 'F') 99 Pair[0] += C-'A'+10; 100 else if (C >= 'a' && C <= 'f') 101 Pair[0] += C-'a'+10; 102 } 103 Pair[1] = 0; 104 for (int i=0; i<16 && Buffer != End; i++, Buffer++) { 105 Pair[1] *= 16; 106 char C = *Buffer; 107 if (C >= '0' && C <= '9') 108 Pair[1] += C-'0'; 109 else if (C >= 'A' && C <= 'F') 110 Pair[1] += C-'A'+10; 111 else if (C >= 'a' && C <= 'f') 112 Pair[1] += C-'a'+10; 113 } 114 if (Buffer != End) 115 Error("constant bigger than 128 bits detected!"); 116} 117 118/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into 119/// { low64, high16 } as usual for an APInt. 120void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, 121 uint64_t Pair[2]) { 122 Pair[1] = 0; 123 for (int i=0; i<4 && Buffer != End; i++, Buffer++) { 124 assert(Buffer != End); 125 Pair[1] *= 16; 126 char C = *Buffer; 127 if (C >= '0' && C <= '9') 128 Pair[1] += C-'0'; 129 else if (C >= 'A' && C <= 'F') 130 Pair[1] += C-'A'+10; 131 else if (C >= 'a' && C <= 'f') 132 Pair[1] += C-'a'+10; 133 } 134 Pair[0] = 0; 135 for (int i=0; i<16; i++, Buffer++) { 136 Pair[0] *= 16; 137 char C = *Buffer; 138 if (C >= '0' && C <= '9') 139 Pair[0] += C-'0'; 140 else if (C >= 'A' && C <= 'F') 141 Pair[0] += C-'A'+10; 142 else if (C >= 'a' && C <= 'f') 143 Pair[0] += C-'a'+10; 144 } 145 if (Buffer != End) 146 Error("constant bigger than 128 bits detected!"); 147} 148 149// UnEscapeLexed - Run through the specified buffer and change \xx codes to the 150// appropriate character. 151static void UnEscapeLexed(std::string &Str) { 152 if (Str.empty()) return; 153 154 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); 155 char *BOut = Buffer; 156 for (char *BIn = Buffer; BIn != EndBuffer; ) { 157 if (BIn[0] == '\\') { 158 if (BIn < EndBuffer-1 && BIn[1] == '\\') { 159 *BOut++ = '\\'; // Two \ becomes one 160 BIn += 2; 161 } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { 162 char Tmp = BIn[3]; BIn[3] = 0; // Terminate string 163 *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number 164 BIn[3] = Tmp; // Restore character 165 BIn += 3; // Skip over handled chars 166 ++BOut; 167 } else { 168 *BOut++ = *BIn++; 169 } 170 } else { 171 *BOut++ = *BIn++; 172 } 173 } 174 Str.resize(BOut-Buffer); 175} 176 177/// isLabelChar - Return true for [-a-zA-Z$._0-9]. 178static bool isLabelChar(char C) { 179 return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; 180} 181 182 183/// isLabelTail - Return true if this pointer points to a valid end of a label. 184static const char *isLabelTail(const char *CurPtr) { 185 while (1) { 186 if (CurPtr[0] == ':') return CurPtr+1; 187 if (!isLabelChar(CurPtr[0])) return 0; 188 ++CurPtr; 189 } 190} 191 192 193 194//===----------------------------------------------------------------------===// 195// Lexer definition. 196//===----------------------------------------------------------------------===// 197 198LLLexer::LLLexer(MemoryBuffer *StartBuf, ParseError &Err) 199 : CurBuf(StartBuf), ErrorInfo(Err), APFloatVal(0.0) { 200 CurPtr = CurBuf->getBufferStart(); 201} 202 203std::string LLLexer::getFilename() const { 204 return CurBuf->getBufferIdentifier(); 205} 206 207int LLLexer::getNextChar() { 208 char CurChar = *CurPtr++; 209 switch (CurChar) { 210 default: return (unsigned char)CurChar; 211 case 0: 212 // A nul character in the stream is either the end of the current buffer or 213 // a random nul in the file. Disambiguate that here. 214 if (CurPtr-1 != CurBuf->getBufferEnd()) 215 return 0; // Just whitespace. 216 217 // Otherwise, return end of file. 218 --CurPtr; // Another call to lex will return EOF again. 219 return EOF; 220 } 221} 222 223 224lltok::Kind LLLexer::LexToken() { 225 TokStart = CurPtr; 226 227 int CurChar = getNextChar(); 228 switch (CurChar) { 229 default: 230 // Handle letters: [a-zA-Z_] 231 if (isalpha(CurChar) || CurChar == '_') 232 return LexIdentifier(); 233 234 return lltok::Error; 235 case EOF: return lltok::Eof; 236 case 0: 237 case ' ': 238 case '\t': 239 case '\n': 240 case '\r': 241 // Ignore whitespace. 242 return LexToken(); 243 case '+': return LexPositive(); 244 case '@': return LexAt(); 245 case '%': return LexPercent(); 246 case '"': return LexQuote(); 247 case '.': 248 if (const char *Ptr = isLabelTail(CurPtr)) { 249 CurPtr = Ptr; 250 StrVal.assign(TokStart, CurPtr-1); 251 return lltok::LabelStr; 252 } 253 if (CurPtr[0] == '.' && CurPtr[1] == '.') { 254 CurPtr += 2; 255 return lltok::dotdotdot; 256 } 257 return lltok::Error; 258 case '$': 259 if (const char *Ptr = isLabelTail(CurPtr)) { 260 CurPtr = Ptr; 261 StrVal.assign(TokStart, CurPtr-1); 262 return lltok::LabelStr; 263 } 264 return lltok::Error; 265 case ';': 266 SkipLineComment(); 267 return LexToken(); 268 case '!': return lltok::Metadata; 269 case '0': case '1': case '2': case '3': case '4': 270 case '5': case '6': case '7': case '8': case '9': 271 case '-': 272 return LexDigitOrNegative(); 273 case '=': return lltok::equal; 274 case '[': return lltok::lsquare; 275 case ']': return lltok::rsquare; 276 case '{': return lltok::lbrace; 277 case '}': return lltok::rbrace; 278 case '<': return lltok::less; 279 case '>': return lltok::greater; 280 case '(': return lltok::lparen; 281 case ')': return lltok::rparen; 282 case ',': return lltok::comma; 283 case '*': return lltok::star; 284 case '\\': return lltok::backslash; 285 } 286} 287 288void LLLexer::SkipLineComment() { 289 while (1) { 290 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) 291 return; 292 } 293} 294 295/// LexAt - Lex all tokens that start with an @ character: 296/// GlobalVar @\"[^\"]*\" 297/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* 298/// GlobalVarID @[0-9]+ 299lltok::Kind LLLexer::LexAt() { 300 // Handle AtStringConstant: @\"[^\"]*\" 301 if (CurPtr[0] == '"') { 302 ++CurPtr; 303 304 while (1) { 305 int CurChar = getNextChar(); 306 307 if (CurChar == EOF) { 308 Error("end of file in global variable name"); 309 return lltok::Error; 310 } 311 if (CurChar == '"') { 312 StrVal.assign(TokStart+2, CurPtr-1); 313 UnEscapeLexed(StrVal); 314 return lltok::GlobalVar; 315 } 316 } 317 } 318 319 // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* 320 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 321 CurPtr[0] == '.' || CurPtr[0] == '_') { 322 ++CurPtr; 323 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 324 CurPtr[0] == '.' || CurPtr[0] == '_') 325 ++CurPtr; 326 327 StrVal.assign(TokStart+1, CurPtr); // Skip @ 328 return lltok::GlobalVar; 329 } 330 331 // Handle GlobalVarID: @[0-9]+ 332 if (isdigit(CurPtr[0])) { 333 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 334 /*empty*/; 335 336 uint64_t Val = atoull(TokStart+1, CurPtr); 337 if ((unsigned)Val != Val) 338 Error("invalid value number (too large)!"); 339 UIntVal = unsigned(Val); 340 return lltok::GlobalID; 341 } 342 343 return lltok::Error; 344} 345 346 347/// LexPercent - Lex all tokens that start with a % character: 348/// LocalVar ::= %\"[^\"]*\" 349/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* 350/// LocalVarID ::= %[0-9]+ 351lltok::Kind LLLexer::LexPercent() { 352 // Handle LocalVarName: %\"[^\"]*\" 353 if (CurPtr[0] == '"') { 354 ++CurPtr; 355 356 while (1) { 357 int CurChar = getNextChar(); 358 359 if (CurChar == EOF) { 360 Error("end of file in string constant"); 361 return lltok::Error; 362 } 363 if (CurChar == '"') { 364 StrVal.assign(TokStart+2, CurPtr-1); 365 UnEscapeLexed(StrVal); 366 return lltok::LocalVar; 367 } 368 } 369 } 370 371 // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* 372 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 373 CurPtr[0] == '.' || CurPtr[0] == '_') { 374 ++CurPtr; 375 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 376 CurPtr[0] == '.' || CurPtr[0] == '_') 377 ++CurPtr; 378 379 StrVal.assign(TokStart+1, CurPtr); // Skip % 380 return lltok::LocalVar; 381 } 382 383 // Handle LocalVarID: %[0-9]+ 384 if (isdigit(CurPtr[0])) { 385 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 386 /*empty*/; 387 388 uint64_t Val = atoull(TokStart+1, CurPtr); 389 if ((unsigned)Val != Val) 390 Error("invalid value number (too large)!"); 391 UIntVal = unsigned(Val); 392 return lltok::LocalVarID; 393 } 394 395 return lltok::Error; 396} 397 398/// LexQuote - Lex all tokens that start with a " character: 399/// QuoteLabel "[^"]+": 400/// StringConstant "[^"]*" 401lltok::Kind LLLexer::LexQuote() { 402 while (1) { 403 int CurChar = getNextChar(); 404 405 if (CurChar == EOF) { 406 Error("end of file in quoted string"); 407 return lltok::Error; 408 } 409 410 if (CurChar != '"') continue; 411 412 if (CurPtr[0] != ':') { 413 StrVal.assign(TokStart+1, CurPtr-1); 414 UnEscapeLexed(StrVal); 415 return lltok::StringConstant; 416 } 417 418 ++CurPtr; 419 StrVal.assign(TokStart+1, CurPtr-2); 420 UnEscapeLexed(StrVal); 421 return lltok::LabelStr; 422 } 423} 424 425static bool JustWhitespaceNewLine(const char *&Ptr) { 426 const char *ThisPtr = Ptr; 427 while (*ThisPtr == ' ' || *ThisPtr == '\t') 428 ++ThisPtr; 429 if (*ThisPtr == '\n' || *ThisPtr == '\r') { 430 Ptr = ThisPtr; 431 return true; 432 } 433 return false; 434} 435 436 437/// LexIdentifier: Handle several related productions: 438/// Label [-a-zA-Z$._0-9]+: 439/// IntegerType i[0-9]+ 440/// Keyword sdiv, float, ... 441/// HexIntConstant [us]0x[0-9A-Fa-f]+ 442lltok::Kind LLLexer::LexIdentifier() { 443 const char *StartChar = CurPtr; 444 const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; 445 const char *KeywordEnd = 0; 446 447 for (; isLabelChar(*CurPtr); ++CurPtr) { 448 // If we decide this is an integer, remember the end of the sequence. 449 if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; 450 if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; 451 } 452 453 // If we stopped due to a colon, this really is a label. 454 if (*CurPtr == ':') { 455 StrVal.assign(StartChar-1, CurPtr++); 456 return lltok::LabelStr; 457 } 458 459 // Otherwise, this wasn't a label. If this was valid as an integer type, 460 // return it. 461 if (IntEnd == 0) IntEnd = CurPtr; 462 if (IntEnd != StartChar) { 463 CurPtr = IntEnd; 464 uint64_t NumBits = atoull(StartChar, CurPtr); 465 if (NumBits < IntegerType::MIN_INT_BITS || 466 NumBits > IntegerType::MAX_INT_BITS) { 467 Error("bitwidth for integer type out of range!"); 468 return lltok::Error; 469 } 470 TyVal = IntegerType::get(NumBits); 471 return lltok::Type; 472 } 473 474 // Otherwise, this was a letter sequence. See which keyword this is. 475 if (KeywordEnd == 0) KeywordEnd = CurPtr; 476 CurPtr = KeywordEnd; 477 --StartChar; 478 unsigned Len = CurPtr-StartChar; 479#define KEYWORD(STR) \ 480 if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ 481 return lltok::kw_##STR; 482 483 KEYWORD(begin); KEYWORD(end); 484 KEYWORD(true); KEYWORD(false); 485 KEYWORD(declare); KEYWORD(define); 486 KEYWORD(global); KEYWORD(constant); 487 488 KEYWORD(private); 489 KEYWORD(internal); 490 KEYWORD(available_externally); 491 KEYWORD(linkonce); 492 KEYWORD(linkonce_odr); 493 KEYWORD(weak); 494 KEYWORD(weak_odr); 495 KEYWORD(appending); 496 KEYWORD(dllimport); 497 KEYWORD(dllexport); 498 KEYWORD(common); 499 KEYWORD(default); 500 KEYWORD(hidden); 501 KEYWORD(protected); 502 KEYWORD(extern_weak); 503 KEYWORD(external); 504 KEYWORD(thread_local); 505 KEYWORD(zeroinitializer); 506 KEYWORD(undef); 507 KEYWORD(null); 508 KEYWORD(to); 509 KEYWORD(tail); 510 KEYWORD(target); 511 KEYWORD(triple); 512 KEYWORD(deplibs); 513 KEYWORD(datalayout); 514 KEYWORD(volatile); 515 KEYWORD(align); 516 KEYWORD(addrspace); 517 KEYWORD(section); 518 KEYWORD(alias); 519 KEYWORD(module); 520 KEYWORD(asm); 521 KEYWORD(sideeffect); 522 KEYWORD(gc); 523 524 KEYWORD(ccc); 525 KEYWORD(fastcc); 526 KEYWORD(coldcc); 527 KEYWORD(x86_stdcallcc); 528 KEYWORD(x86_fastcallcc); 529 KEYWORD(cc); 530 KEYWORD(c); 531 532 KEYWORD(signext); 533 KEYWORD(zeroext); 534 KEYWORD(inreg); 535 KEYWORD(sret); 536 KEYWORD(nounwind); 537 KEYWORD(noreturn); 538 KEYWORD(noalias); 539 KEYWORD(nocapture); 540 KEYWORD(byval); 541 KEYWORD(nest); 542 KEYWORD(readnone); 543 KEYWORD(readonly); 544 545 KEYWORD(noinline); 546 KEYWORD(alwaysinline); 547 KEYWORD(optsize); 548 KEYWORD(ssp); 549 KEYWORD(sspreq); 550 KEYWORD(noredzone); 551 552 KEYWORD(type); 553 KEYWORD(opaque); 554 555 KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); 556 KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); 557 KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); 558 KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une); 559 560 KEYWORD(x); 561#undef KEYWORD 562 563 // Keywords for types. 564#define TYPEKEYWORD(STR, LLVMTY) \ 565 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 566 TyVal = LLVMTY; return lltok::Type; } 567 TYPEKEYWORD("void", Type::VoidTy); 568 TYPEKEYWORD("float", Type::FloatTy); 569 TYPEKEYWORD("double", Type::DoubleTy); 570 TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty); 571 TYPEKEYWORD("fp128", Type::FP128Ty); 572 TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty); 573 TYPEKEYWORD("label", Type::LabelTy); 574 TYPEKEYWORD("metadata", Type::MetadataTy); 575#undef TYPEKEYWORD 576 577 // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is 578 // to avoid conflicting with the sext/zext instructions, below. 579 if (Len == 4 && !memcmp(StartChar, "sext", 4)) { 580 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 581 if (JustWhitespaceNewLine(CurPtr)) 582 return lltok::kw_signext; 583 } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { 584 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 585 if (JustWhitespaceNewLine(CurPtr)) 586 return lltok::kw_zeroext; 587 } 588 589 // Keywords for instructions. 590#define INSTKEYWORD(STR, Enum) \ 591 if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \ 592 UIntVal = Instruction::Enum; return lltok::kw_##STR; } 593 594 INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd); 595 INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub); 596 INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul); 597 INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv); 598 INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem); 599 INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr); 600 INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor); 601 INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp); 602 INSTKEYWORD(vicmp, VICmp); INSTKEYWORD(vfcmp, VFCmp); 603 604 INSTKEYWORD(phi, PHI); 605 INSTKEYWORD(call, Call); 606 INSTKEYWORD(trunc, Trunc); 607 INSTKEYWORD(zext, ZExt); 608 INSTKEYWORD(sext, SExt); 609 INSTKEYWORD(fptrunc, FPTrunc); 610 INSTKEYWORD(fpext, FPExt); 611 INSTKEYWORD(uitofp, UIToFP); 612 INSTKEYWORD(sitofp, SIToFP); 613 INSTKEYWORD(fptoui, FPToUI); 614 INSTKEYWORD(fptosi, FPToSI); 615 INSTKEYWORD(inttoptr, IntToPtr); 616 INSTKEYWORD(ptrtoint, PtrToInt); 617 INSTKEYWORD(bitcast, BitCast); 618 INSTKEYWORD(select, Select); 619 INSTKEYWORD(va_arg, VAArg); 620 INSTKEYWORD(ret, Ret); 621 INSTKEYWORD(br, Br); 622 INSTKEYWORD(switch, Switch); 623 INSTKEYWORD(invoke, Invoke); 624 INSTKEYWORD(unwind, Unwind); 625 INSTKEYWORD(unreachable, Unreachable); 626 627 INSTKEYWORD(malloc, Malloc); 628 INSTKEYWORD(alloca, Alloca); 629 INSTKEYWORD(free, Free); 630 INSTKEYWORD(load, Load); 631 INSTKEYWORD(store, Store); 632 INSTKEYWORD(getelementptr, GetElementPtr); 633 634 INSTKEYWORD(extractelement, ExtractElement); 635 INSTKEYWORD(insertelement, InsertElement); 636 INSTKEYWORD(shufflevector, ShuffleVector); 637 INSTKEYWORD(getresult, ExtractValue); 638 INSTKEYWORD(extractvalue, ExtractValue); 639 INSTKEYWORD(insertvalue, InsertValue); 640#undef INSTKEYWORD 641 642 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by 643 // the CFE to avoid forcing it to deal with 64-bit numbers. 644 if ((TokStart[0] == 'u' || TokStart[0] == 's') && 645 TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { 646 int len = CurPtr-TokStart-3; 647 uint32_t bits = len * 4; 648 APInt Tmp(bits, TokStart+3, len, 16); 649 uint32_t activeBits = Tmp.getActiveBits(); 650 if (activeBits > 0 && activeBits < bits) 651 Tmp.trunc(activeBits); 652 APSIntVal = APSInt(Tmp, TokStart[0] == 'u'); 653 return lltok::APSInt; 654 } 655 656 // If this is "cc1234", return this as just "cc". 657 if (TokStart[0] == 'c' && TokStart[1] == 'c') { 658 CurPtr = TokStart+2; 659 return lltok::kw_cc; 660 } 661 662 // If this starts with "call", return it as CALL. This is to support old 663 // broken .ll files. FIXME: remove this with LLVM 3.0. 664 if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) { 665 CurPtr = TokStart+4; 666 UIntVal = Instruction::Call; 667 return lltok::kw_call; 668 } 669 670 // Finally, if this isn't known, return an error. 671 CurPtr = TokStart+1; 672 return lltok::Error; 673} 674 675 676/// Lex0x: Handle productions that start with 0x, knowing that it matches and 677/// that this is not a label: 678/// HexFPConstant 0x[0-9A-Fa-f]+ 679/// HexFP80Constant 0xK[0-9A-Fa-f]+ 680/// HexFP128Constant 0xL[0-9A-Fa-f]+ 681/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 682lltok::Kind LLLexer::Lex0x() { 683 CurPtr = TokStart + 2; 684 685 char Kind; 686 if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { 687 Kind = *CurPtr++; 688 } else { 689 Kind = 'J'; 690 } 691 692 if (!isxdigit(CurPtr[0])) { 693 // Bad token, return it as an error. 694 CurPtr = TokStart+1; 695 return lltok::Error; 696 } 697 698 while (isxdigit(CurPtr[0])) 699 ++CurPtr; 700 701 if (Kind == 'J') { 702 // HexFPConstant - Floating point constant represented in IEEE format as a 703 // hexadecimal number for when exponential notation is not precise enough. 704 // Float and double only. 705 APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr))); 706 return lltok::APFloat; 707 } 708 709 uint64_t Pair[2]; 710 switch (Kind) { 711 default: assert(0 && "Unknown kind!"); 712 case 'K': 713 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) 714 FP80HexToIntPair(TokStart+3, CurPtr, Pair); 715 APFloatVal = APFloat(APInt(80, 2, Pair)); 716 return lltok::APFloat; 717 case 'L': 718 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) 719 HexToIntPair(TokStart+3, CurPtr, Pair); 720 APFloatVal = APFloat(APInt(128, 2, Pair), true); 721 return lltok::APFloat; 722 case 'M': 723 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) 724 HexToIntPair(TokStart+3, CurPtr, Pair); 725 APFloatVal = APFloat(APInt(128, 2, Pair)); 726 return lltok::APFloat; 727 } 728} 729 730/// LexIdentifier: Handle several related productions: 731/// Label [-a-zA-Z$._0-9]+: 732/// NInteger -[0-9]+ 733/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 734/// PInteger [0-9]+ 735/// HexFPConstant 0x[0-9A-Fa-f]+ 736/// HexFP80Constant 0xK[0-9A-Fa-f]+ 737/// HexFP128Constant 0xL[0-9A-Fa-f]+ 738/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 739lltok::Kind LLLexer::LexDigitOrNegative() { 740 // If the letter after the negative is a number, this is probably a label. 741 if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { 742 // Okay, this is not a number after the -, it's probably a label. 743 if (const char *End = isLabelTail(CurPtr)) { 744 StrVal.assign(TokStart, End-1); 745 CurPtr = End; 746 return lltok::LabelStr; 747 } 748 749 return lltok::Error; 750 } 751 752 // At this point, it is either a label, int or fp constant. 753 754 // Skip digits, we have at least one. 755 for (; isdigit(CurPtr[0]); ++CurPtr) 756 /*empty*/; 757 758 // Check to see if this really is a label afterall, e.g. "-1:". 759 if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { 760 if (const char *End = isLabelTail(CurPtr)) { 761 StrVal.assign(TokStart, End-1); 762 CurPtr = End; 763 return lltok::LabelStr; 764 } 765 } 766 767 // If the next character is a '.', then it is a fp value, otherwise its 768 // integer. 769 if (CurPtr[0] != '.') { 770 if (TokStart[0] == '0' && TokStart[1] == 'x') 771 return Lex0x(); 772 unsigned Len = CurPtr-TokStart; 773 uint32_t numBits = ((Len * 64) / 19) + 2; 774 APInt Tmp(numBits, TokStart, Len, 10); 775 if (TokStart[0] == '-') { 776 uint32_t minBits = Tmp.getMinSignedBits(); 777 if (minBits > 0 && minBits < numBits) 778 Tmp.trunc(minBits); 779 APSIntVal = APSInt(Tmp, false); 780 } else { 781 uint32_t activeBits = Tmp.getActiveBits(); 782 if (activeBits > 0 && activeBits < numBits) 783 Tmp.trunc(activeBits); 784 APSIntVal = APSInt(Tmp, true); 785 } 786 return lltok::APSInt; 787 } 788 789 ++CurPtr; 790 791 // Skip over [0-9]*([eE][-+]?[0-9]+)? 792 while (isdigit(CurPtr[0])) ++CurPtr; 793 794 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 795 if (isdigit(CurPtr[1]) || 796 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 797 CurPtr += 2; 798 while (isdigit(CurPtr[0])) ++CurPtr; 799 } 800 } 801 802 APFloatVal = APFloat(atof(TokStart)); 803 return lltok::APFloat; 804} 805 806/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 807lltok::Kind LLLexer::LexPositive() { 808 // If the letter after the negative is a number, this is probably not a 809 // label. 810 if (!isdigit(CurPtr[0])) 811 return lltok::Error; 812 813 // Skip digits. 814 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 815 /*empty*/; 816 817 // At this point, we need a '.'. 818 if (CurPtr[0] != '.') { 819 CurPtr = TokStart+1; 820 return lltok::Error; 821 } 822 823 ++CurPtr; 824 825 // Skip over [0-9]*([eE][-+]?[0-9]+)? 826 while (isdigit(CurPtr[0])) ++CurPtr; 827 828 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 829 if (isdigit(CurPtr[1]) || 830 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 831 CurPtr += 2; 832 while (isdigit(CurPtr[0])) ++CurPtr; 833 } 834 } 835 836 APFloatVal = APFloat(atof(TokStart)); 837 return lltok::APFloat; 838} 839