LLLexer.cpp revision 80a75bfae980df96f969f1c05b0c4a80ce975240
1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Implement the Lexer for .ll files. 11// 12//===----------------------------------------------------------------------===// 13 14#include "LLLexer.h" 15#include "ParserInternals.h" 16#include "llvm/Support/MemoryBuffer.h" 17#include "llvm/Support/MathExtras.h" 18 19#include <list> 20#include "llvmAsmParser.h" 21using namespace llvm; 22 23//===----------------------------------------------------------------------===// 24// Helper functions. 25//===----------------------------------------------------------------------===// 26 27// atoull - Convert an ascii string of decimal digits into the unsigned long 28// long representation... this does not have to do input error checking, 29// because we know that the input will be matched by a suitable regex... 30// 31static uint64_t atoull(const char *Buffer, const char *End) { 32 uint64_t Result = 0; 33 for (; Buffer != End; Buffer++) { 34 uint64_t OldRes = Result; 35 Result *= 10; 36 Result += *Buffer-'0'; 37 if (Result < OldRes) { // Uh, oh, overflow detected!!! 38 GenerateError("constant bigger than 64 bits detected!"); 39 return 0; 40 } 41 } 42 return Result; 43} 44 45static uint64_t HexIntToVal(const char *Buffer, const char *End) { 46 uint64_t Result = 0; 47 for (; Buffer != End; ++Buffer) { 48 uint64_t OldRes = Result; 49 Result *= 16; 50 char C = *Buffer; 51 if (C >= '0' && C <= '9') 52 Result += C-'0'; 53 else if (C >= 'A' && C <= 'F') 54 Result += C-'A'+10; 55 else if (C >= 'a' && C <= 'f') 56 Result += C-'a'+10; 57 58 if (Result < OldRes) { // Uh, oh, overflow detected!!! 59 GenerateError("constant bigger than 64 bits detected!"); 60 return 0; 61 } 62 } 63 return Result; 64} 65 66// HexToFP - Convert the ascii string in hexadecimal format to the floating 67// point representation of it. 68// 69static double HexToFP(const char *Buffer, const char *End) { 70 return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double 71} 72 73static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ 74 Pair[0] = 0; 75 for (int i=0; i<16; i++, Buffer++) { 76 assert(Buffer != End); 77 Pair[0] *= 16; 78 char C = *Buffer; 79 if (C >= '0' && C <= '9') 80 Pair[0] += C-'0'; 81 else if (C >= 'A' && C <= 'F') 82 Pair[0] += C-'A'+10; 83 else if (C >= 'a' && C <= 'f') 84 Pair[0] += C-'a'+10; 85 } 86 Pair[1] = 0; 87 for (int i=0; i<16 && Buffer != End; i++, Buffer++) { 88 Pair[1] *= 16; 89 char C = *Buffer; 90 if (C >= '0' && C <= '9') 91 Pair[1] += C-'0'; 92 else if (C >= 'A' && C <= 'F') 93 Pair[1] += C-'A'+10; 94 else if (C >= 'a' && C <= 'f') 95 Pair[1] += C-'a'+10; 96 } 97 if (Buffer != End) 98 GenerateError("constant bigger than 128 bits detected!"); 99} 100 101// UnEscapeLexed - Run through the specified buffer and change \xx codes to the 102// appropriate character. 103static void UnEscapeLexed(std::string &Str) { 104 if (Str.empty()) return; 105 106 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); 107 char *BOut = Buffer; 108 for (char *BIn = Buffer; BIn != EndBuffer; ) { 109 if (BIn[0] == '\\') { 110 if (BIn < EndBuffer-1 && BIn[1] == '\\') { 111 *BOut++ = '\\'; // Two \ becomes one 112 BIn += 2; 113 } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { 114 char Tmp = BIn[3]; BIn[3] = 0; // Terminate string 115 *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number 116 BIn[3] = Tmp; // Restore character 117 BIn += 3; // Skip over handled chars 118 ++BOut; 119 } else { 120 *BOut++ = *BIn++; 121 } 122 } else { 123 *BOut++ = *BIn++; 124 } 125 } 126 Str.resize(BOut-Buffer); 127} 128 129/// isLabelChar - Return true for [-a-zA-Z$._0-9]. 130static bool isLabelChar(char C) { 131 return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; 132} 133 134 135/// isLabelTail - Return true if this pointer points to a valid end of a label. 136static const char *isLabelTail(const char *CurPtr) { 137 while (1) { 138 if (CurPtr[0] == ':') return CurPtr+1; 139 if (!isLabelChar(CurPtr[0])) return 0; 140 ++CurPtr; 141 } 142} 143 144 145 146//===----------------------------------------------------------------------===// 147// Lexer definition. 148//===----------------------------------------------------------------------===// 149 150// FIXME: REMOVE THIS. 151#define YYEOF 0 152#define YYERROR -2 153 154LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) { 155 CurPtr = CurBuf->getBufferStart(); 156} 157 158std::string LLLexer::getFilename() const { 159 return CurBuf->getBufferIdentifier(); 160} 161 162int LLLexer::getNextChar() { 163 char CurChar = *CurPtr++; 164 switch (CurChar) { 165 default: return (unsigned char)CurChar; 166 case 0: 167 // A nul character in the stream is either the end of the current buffer or 168 // a random nul in the file. Disambiguate that here. 169 if (CurPtr-1 != CurBuf->getBufferEnd()) 170 return 0; // Just whitespace. 171 172 // Otherwise, return end of file. 173 --CurPtr; // Another call to lex will return EOF again. 174 return EOF; 175 case '\n': 176 case '\r': 177 // Handle the newline character by ignoring it and incrementing the line 178 // count. However, be careful about 'dos style' files with \n\r in them. 179 // Only treat a \n\r or \r\n as a single line. 180 if ((*CurPtr == '\n' || (*CurPtr == '\r')) && 181 *CurPtr != CurChar) 182 ++CurPtr; // Eat the two char newline sequence. 183 184 ++CurLineNo; 185 return '\n'; 186 } 187} 188 189 190int LLLexer::LexToken() { 191 TokStart = CurPtr; 192 193 int CurChar = getNextChar(); 194 195 switch (CurChar) { 196 default: 197 // Handle letters: [a-zA-Z_] 198 if (isalpha(CurChar) || CurChar == '_') 199 return LexIdentifier(); 200 201 return CurChar; 202 case EOF: return YYEOF; 203 case 0: 204 case ' ': 205 case '\t': 206 case '\n': 207 case '\r': 208 // Ignore whitespace. 209 return LexToken(); 210 case '+': return LexPositive(); 211 case '@': return LexAt(); 212 case '%': return LexPercent(); 213 case '"': return LexQuote(); 214 case '.': 215 if (const char *Ptr = isLabelTail(CurPtr)) { 216 CurPtr = Ptr; 217 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 218 return LABELSTR; 219 } 220 if (CurPtr[0] == '.' && CurPtr[1] == '.') { 221 CurPtr += 2; 222 return DOTDOTDOT; 223 } 224 return '.'; 225 case '$': 226 if (const char *Ptr = isLabelTail(CurPtr)) { 227 CurPtr = Ptr; 228 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 229 return LABELSTR; 230 } 231 return '$'; 232 case ';': 233 SkipLineComment(); 234 return LexToken(); 235 case '0': case '1': case '2': case '3': case '4': 236 case '5': case '6': case '7': case '8': case '9': 237 case '-': 238 return LexDigitOrNegative(); 239 } 240} 241 242void LLLexer::SkipLineComment() { 243 while (1) { 244 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) 245 return; 246 } 247} 248 249/// LexAt - Lex all tokens that start with an @ character: 250/// AtStringConstant @\"[^\"]*\" 251/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]* 252/// GlobalVarID @[0-9]+ 253int LLLexer::LexAt() { 254 // Handle AtStringConstant: @\"[^\"]*\" 255 if (CurPtr[0] == '"') { 256 ++CurPtr; 257 258 while (1) { 259 int CurChar = getNextChar(); 260 261 if (CurChar == EOF) { 262 GenerateError("End of file in global variable name"); 263 return YYERROR; 264 } 265 if (CurChar == '"') { 266 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 267 UnEscapeLexed(*llvmAsmlval.StrVal); 268 return ATSTRINGCONSTANT; 269 } 270 } 271 } 272 273 // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* 274 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 275 CurPtr[0] == '.' || CurPtr[0] == '_') { 276 ++CurPtr; 277 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 278 CurPtr[0] == '.' || CurPtr[0] == '_') 279 ++CurPtr; 280 281 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @ 282 return GLOBALVAR; 283 } 284 285 // Handle GlobalVarID: @[0-9]+ 286 if (isdigit(CurPtr[0])) { 287 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); 288 289 uint64_t Val = atoull(TokStart+1, CurPtr); 290 if ((unsigned)Val != Val) 291 GenerateError("Invalid value number (too large)!"); 292 llvmAsmlval.UIntVal = unsigned(Val); 293 return GLOBALVAL_ID; 294 } 295 296 return '@'; 297} 298 299 300/// LexPercent - Lex all tokens that start with a % character: 301/// PctStringConstant %\"[^\"]*\" 302/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]* 303/// LocalVarID %[0-9]+ 304int LLLexer::LexPercent() { 305 // Handle PctStringConstant: %\"[^\"]*\" 306 if (CurPtr[0] == '"') { 307 ++CurPtr; 308 309 while (1) { 310 int CurChar = getNextChar(); 311 312 if (CurChar == EOF) { 313 GenerateError("End of file in local variable name"); 314 return YYERROR; 315 } 316 if (CurChar == '"') { 317 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 318 UnEscapeLexed(*llvmAsmlval.StrVal); 319 return PCTSTRINGCONSTANT; 320 } 321 } 322 } 323 324 // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* 325 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 326 CurPtr[0] == '.' || CurPtr[0] == '_') { 327 ++CurPtr; 328 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 329 CurPtr[0] == '.' || CurPtr[0] == '_') 330 ++CurPtr; 331 332 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip % 333 return LOCALVAR; 334 } 335 336 // Handle LocalVarID: %[0-9]+ 337 if (isdigit(CurPtr[0])) { 338 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); 339 340 uint64_t Val = atoull(TokStart+1, CurPtr); 341 if ((unsigned)Val != Val) 342 GenerateError("Invalid value number (too large)!"); 343 llvmAsmlval.UIntVal = unsigned(Val); 344 return LOCALVAL_ID; 345 } 346 347 return '%'; 348} 349 350/// LexQuote - Lex all tokens that start with a " character: 351/// QuoteLabel "[^"]+": 352/// StringConstant "[^"]*" 353int LLLexer::LexQuote() { 354 while (1) { 355 int CurChar = getNextChar(); 356 357 if (CurChar == EOF) { 358 GenerateError("End of file in quoted string"); 359 return YYERROR; 360 } 361 362 if (CurChar != '"') continue; 363 364 if (CurPtr[0] != ':') { 365 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1); 366 UnEscapeLexed(*llvmAsmlval.StrVal); 367 return STRINGCONSTANT; 368 } 369 370 ++CurPtr; 371 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2); 372 UnEscapeLexed(*llvmAsmlval.StrVal); 373 return LABELSTR; 374 } 375} 376 377static bool JustWhitespaceNewLine(const char *&Ptr) { 378 const char *ThisPtr = Ptr; 379 while (*ThisPtr == ' ' || *ThisPtr == '\t') 380 ++ThisPtr; 381 if (*ThisPtr == '\n' || *ThisPtr == '\r') { 382 Ptr = ThisPtr; 383 return true; 384 } 385 return false; 386} 387 388 389/// LexIdentifier: Handle several related productions: 390/// Label [-a-zA-Z$._0-9]+: 391/// IntegerType i[0-9]+ 392/// Keyword sdiv, float, ... 393/// HexIntConstant [us]0x[0-9A-Fa-f]+ 394int LLLexer::LexIdentifier() { 395 const char *StartChar = CurPtr; 396 const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; 397 const char *KeywordEnd = 0; 398 399 for (; isLabelChar(*CurPtr); ++CurPtr) { 400 // If we decide this is an integer, remember the end of the sequence. 401 if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; 402 if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; 403 } 404 405 // If we stopped due to a colon, this really is a label. 406 if (*CurPtr == ':') { 407 llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++); 408 return LABELSTR; 409 } 410 411 // Otherwise, this wasn't a label. If this was valid as an integer type, 412 // return it. 413 if (IntEnd == 0) IntEnd = CurPtr; 414 if (IntEnd != StartChar) { 415 CurPtr = IntEnd; 416 uint64_t NumBits = atoull(StartChar, CurPtr); 417 if (NumBits < IntegerType::MIN_INT_BITS || 418 NumBits > IntegerType::MAX_INT_BITS) { 419 GenerateError("Bitwidth for integer type out of range!"); 420 return YYERROR; 421 } 422 const Type* Ty = IntegerType::get(NumBits); 423 llvmAsmlval.PrimType = Ty; 424 return INTTYPE; 425 } 426 427 // Otherwise, this was a letter sequence. See which keyword this is. 428 if (KeywordEnd == 0) KeywordEnd = CurPtr; 429 CurPtr = KeywordEnd; 430 --StartChar; 431 unsigned Len = CurPtr-StartChar; 432#define KEYWORD(STR, TOK) \ 433 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK; 434 435 KEYWORD("begin", BEGINTOK); 436 KEYWORD("end", ENDTOK); 437 KEYWORD("true", TRUETOK); 438 KEYWORD("false", FALSETOK); 439 KEYWORD("declare", DECLARE); 440 KEYWORD("define", DEFINE); 441 KEYWORD("global", GLOBAL); 442 KEYWORD("constant", CONSTANT); 443 444 KEYWORD("internal", INTERNAL); 445 KEYWORD("linkonce", LINKONCE); 446 KEYWORD("weak", WEAK); 447 KEYWORD("appending", APPENDING); 448 KEYWORD("dllimport", DLLIMPORT); 449 KEYWORD("dllexport", DLLEXPORT); 450 KEYWORD("hidden", HIDDEN); 451 KEYWORD("protected", PROTECTED); 452 KEYWORD("extern_weak", EXTERN_WEAK); 453 KEYWORD("external", EXTERNAL); 454 KEYWORD("thread_local", THREAD_LOCAL); 455 KEYWORD("zeroinitializer", ZEROINITIALIZER); 456 KEYWORD("undef", UNDEF); 457 KEYWORD("null", NULL_TOK); 458 KEYWORD("to", TO); 459 KEYWORD("tail", TAIL); 460 KEYWORD("target", TARGET); 461 KEYWORD("triple", TRIPLE); 462 KEYWORD("deplibs", DEPLIBS); 463 KEYWORD("datalayout", DATALAYOUT); 464 KEYWORD("volatile", VOLATILE); 465 KEYWORD("align", ALIGN); 466 KEYWORD("section", SECTION); 467 KEYWORD("alias", ALIAS); 468 KEYWORD("module", MODULE); 469 KEYWORD("asm", ASM_TOK); 470 KEYWORD("sideeffect", SIDEEFFECT); 471 KEYWORD("gc", GC); 472 473 KEYWORD("cc", CC_TOK); 474 KEYWORD("ccc", CCC_TOK); 475 KEYWORD("fastcc", FASTCC_TOK); 476 KEYWORD("coldcc", COLDCC_TOK); 477 KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK); 478 KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK); 479 480 KEYWORD("signext", SIGNEXT); 481 KEYWORD("zeroext", ZEROEXT); 482 KEYWORD("inreg", INREG); 483 KEYWORD("sret", SRET); 484 KEYWORD("nounwind", NOUNWIND); 485 KEYWORD("noreturn", NORETURN); 486 KEYWORD("noalias", NOALIAS); 487 KEYWORD("byval", BYVAL); 488 KEYWORD("nest", NEST); 489 KEYWORD("readnone", READNONE); 490 KEYWORD("readonly", READONLY); 491 492 KEYWORD("type", TYPE); 493 KEYWORD("opaque", OPAQUE); 494 495 KEYWORD("eq" , EQ); 496 KEYWORD("ne" , NE); 497 KEYWORD("slt", SLT); 498 KEYWORD("sgt", SGT); 499 KEYWORD("sle", SLE); 500 KEYWORD("sge", SGE); 501 KEYWORD("ult", ULT); 502 KEYWORD("ugt", UGT); 503 KEYWORD("ule", ULE); 504 KEYWORD("uge", UGE); 505 KEYWORD("oeq", OEQ); 506 KEYWORD("one", ONE); 507 KEYWORD("olt", OLT); 508 KEYWORD("ogt", OGT); 509 KEYWORD("ole", OLE); 510 KEYWORD("oge", OGE); 511 KEYWORD("ord", ORD); 512 KEYWORD("uno", UNO); 513 KEYWORD("ueq", UEQ); 514 KEYWORD("une", UNE); 515#undef KEYWORD 516 517 // Keywords for types. 518#define TYPEKEYWORD(STR, LLVMTY, TOK) \ 519 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 520 llvmAsmlval.PrimType = LLVMTY; return TOK; } 521 TYPEKEYWORD("void", Type::VoidTy, VOID); 522 TYPEKEYWORD("float", Type::FloatTy, FLOAT); 523 TYPEKEYWORD("double", Type::DoubleTy, DOUBLE); 524 TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80); 525 TYPEKEYWORD("fp128", Type::FP128Ty, FP128); 526 TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128); 527 TYPEKEYWORD("label", Type::LabelTy, LABEL); 528#undef TYPEKEYWORD 529 530 // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is 531 // to avoid conflicting with the sext/zext instructions, below. 532 if (Len == 4 && !memcmp(StartChar, "sext", 4)) { 533 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 534 if (JustWhitespaceNewLine(CurPtr)) 535 return SIGNEXT; 536 } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { 537 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 538 if (JustWhitespaceNewLine(CurPtr)) 539 return ZEROEXT; 540 } 541 542 // Keywords for instructions. 543#define INSTKEYWORD(STR, type, Enum, TOK) \ 544 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 545 llvmAsmlval.type = Instruction::Enum; return TOK; } 546 547 INSTKEYWORD("add", BinaryOpVal, Add, ADD); 548 INSTKEYWORD("sub", BinaryOpVal, Sub, SUB); 549 INSTKEYWORD("mul", BinaryOpVal, Mul, MUL); 550 INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV); 551 INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV); 552 INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV); 553 INSTKEYWORD("urem", BinaryOpVal, URem, UREM); 554 INSTKEYWORD("srem", BinaryOpVal, SRem, SREM); 555 INSTKEYWORD("frem", BinaryOpVal, FRem, FREM); 556 INSTKEYWORD("shl", BinaryOpVal, Shl, SHL); 557 INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR); 558 INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR); 559 INSTKEYWORD("and", BinaryOpVal, And, AND); 560 INSTKEYWORD("or", BinaryOpVal, Or , OR ); 561 INSTKEYWORD("xor", BinaryOpVal, Xor, XOR); 562 INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP); 563 INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP); 564 565 INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK); 566 INSTKEYWORD("call", OtherOpVal, Call, CALL); 567 INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC); 568 INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT); 569 INSTKEYWORD("sext", CastOpVal, SExt, SEXT); 570 INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC); 571 INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT); 572 INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP); 573 INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP); 574 INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI); 575 INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI); 576 INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR); 577 INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT); 578 INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST); 579 INSTKEYWORD("select", OtherOpVal, Select, SELECT); 580 INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG); 581 INSTKEYWORD("ret", TermOpVal, Ret, RET); 582 INSTKEYWORD("br", TermOpVal, Br, BR); 583 INSTKEYWORD("switch", TermOpVal, Switch, SWITCH); 584 INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE); 585 INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND); 586 INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE); 587 588 INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC); 589 INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA); 590 INSTKEYWORD("free", MemOpVal, Free, FREE); 591 INSTKEYWORD("load", MemOpVal, Load, LOAD); 592 INSTKEYWORD("store", MemOpVal, Store, STORE); 593 INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR); 594 595 INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT); 596 INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT); 597 INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR); 598#undef INSTKEYWORD 599 600 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by 601 // the CFE to avoid forcing it to deal with 64-bit numbers. 602 if ((TokStart[0] == 'u' || TokStart[0] == 's') && 603 TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { 604 int len = CurPtr-TokStart-3; 605 uint32_t bits = len * 4; 606 APInt Tmp(bits, TokStart+3, len, 16); 607 uint32_t activeBits = Tmp.getActiveBits(); 608 if (activeBits > 0 && activeBits < bits) 609 Tmp.trunc(activeBits); 610 if (Tmp.getBitWidth() > 64) { 611 llvmAsmlval.APIntVal = new APInt(Tmp); 612 return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL; 613 } else if (TokStart[0] == 's') { 614 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 615 return ESINT64VAL; 616 } else { 617 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 618 return EUINT64VAL; 619 } 620 } 621 622 // If this is "cc1234", return this as just "cc". 623 if (TokStart[0] == 'c' && TokStart[1] == 'c') { 624 CurPtr = TokStart+2; 625 return CC_TOK; 626 } 627 628 // If this starts with "call", return it as CALL. This is to support old 629 // broken .ll files. FIXME: remove this with LLVM 3.0. 630 if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) { 631 CurPtr = TokStart+4; 632 llvmAsmlval.OtherOpVal = Instruction::Call; 633 return CALL; 634 } 635 636 // Finally, if this isn't known, return just a single character. 637 CurPtr = TokStart+1; 638 return TokStart[0]; 639} 640 641 642/// Lex0x: Handle productions that start with 0x, knowing that it matches and 643/// that this is not a label: 644/// HexFPConstant 0x[0-9A-Fa-f]+ 645/// HexFP80Constant 0xK[0-9A-Fa-f]+ 646/// HexFP128Constant 0xL[0-9A-Fa-f]+ 647/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 648int LLLexer::Lex0x() { 649 CurPtr = TokStart + 2; 650 651 char Kind; 652 if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { 653 Kind = *CurPtr++; 654 } else { 655 Kind = 'J'; 656 } 657 658 if (!isxdigit(CurPtr[0])) { 659 // Bad token, return it as just zero. 660 CurPtr = TokStart+1; 661 return '0'; 662 } 663 664 while (isxdigit(CurPtr[0])) 665 ++CurPtr; 666 667 if (Kind == 'J') { 668 // HexFPConstant - Floating point constant represented in IEEE format as a 669 // hexadecimal number for when exponential notation is not precise enough. 670 // Float and double only. 671 llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr)); 672 return FPVAL; 673 } 674 675 uint64_t Pair[2]; 676 HexToIntPair(TokStart+3, CurPtr, Pair); 677 switch (Kind) { 678 default: assert(0 && "Unknown kind!"); 679 case 'K': 680 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) 681 llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair)); 682 return FPVAL; 683 case 'L': 684 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) 685 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true); 686 return FPVAL; 687 case 'M': 688 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) 689 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair)); 690 return FPVAL; 691 } 692} 693 694/// LexIdentifier: Handle several related productions: 695/// Label [-a-zA-Z$._0-9]+: 696/// NInteger -[0-9]+ 697/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 698/// PInteger [0-9]+ 699/// HexFPConstant 0x[0-9A-Fa-f]+ 700/// HexFP80Constant 0xK[0-9A-Fa-f]+ 701/// HexFP128Constant 0xL[0-9A-Fa-f]+ 702/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 703int LLLexer::LexDigitOrNegative() { 704 // If the letter after the negative is a number, this is probably a label. 705 if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { 706 // Okay, this is not a number after the -, it's probably a label. 707 if (const char *End = isLabelTail(CurPtr)) { 708 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 709 CurPtr = End; 710 return LABELSTR; 711 } 712 713 return CurPtr[-1]; 714 } 715 716 // At this point, it is either a label, int or fp constant. 717 718 // Skip digits, we have at least one. 719 for (; isdigit(CurPtr[0]); ++CurPtr); 720 721 // Check to see if this really is a label afterall, e.g. "-1:". 722 if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { 723 if (const char *End = isLabelTail(CurPtr)) { 724 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 725 CurPtr = End; 726 return LABELSTR; 727 } 728 } 729 730 // If the next character is a '.', then it is a fp value, otherwise its 731 // integer. 732 if (CurPtr[0] != '.') { 733 if (TokStart[0] == '0' && TokStart[1] == 'x') 734 return Lex0x(); 735 unsigned Len = CurPtr-TokStart; 736 uint32_t numBits = ((Len * 64) / 19) + 2; 737 APInt Tmp(numBits, TokStart, Len, 10); 738 if (TokStart[0] == '-') { 739 uint32_t minBits = Tmp.getMinSignedBits(); 740 if (minBits > 0 && minBits < numBits) 741 Tmp.trunc(minBits); 742 if (Tmp.getBitWidth() > 64) { 743 llvmAsmlval.APIntVal = new APInt(Tmp); 744 return ESAPINTVAL; 745 } else { 746 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 747 return ESINT64VAL; 748 } 749 } else { 750 uint32_t activeBits = Tmp.getActiveBits(); 751 if (activeBits > 0 && activeBits < numBits) 752 Tmp.trunc(activeBits); 753 if (Tmp.getBitWidth() > 64) { 754 llvmAsmlval.APIntVal = new APInt(Tmp); 755 return EUAPINTVAL; 756 } else { 757 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 758 return EUINT64VAL; 759 } 760 } 761 } 762 763 ++CurPtr; 764 765 // Skip over [0-9]*([eE][-+]?[0-9]+)? 766 while (isdigit(CurPtr[0])) ++CurPtr; 767 768 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 769 if (isdigit(CurPtr[1]) || 770 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 771 CurPtr += 2; 772 while (isdigit(CurPtr[0])) ++CurPtr; 773 } 774 } 775 776 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 777 return FPVAL; 778} 779 780/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 781int LLLexer::LexPositive() { 782 // If the letter after the negative is a number, this is probably not a 783 // label. 784 if (!isdigit(CurPtr[0])) 785 return CurPtr[-1]; 786 787 // Skip digits. 788 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); 789 790 // At this point, we need a '.'. 791 if (CurPtr[0] != '.') { 792 CurPtr = TokStart+1; 793 return TokStart[0]; 794 } 795 796 ++CurPtr; 797 798 // Skip over [0-9]*([eE][-+]?[0-9]+)? 799 while (isdigit(CurPtr[0])) ++CurPtr; 800 801 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 802 if (isdigit(CurPtr[1]) || 803 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 804 CurPtr += 2; 805 while (isdigit(CurPtr[0])) ++CurPtr; 806 } 807 } 808 809 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 810 return FPVAL; 811} 812 813 814//===----------------------------------------------------------------------===// 815// Define the interface to this file. 816//===----------------------------------------------------------------------===// 817 818static LLLexer *TheLexer; 819 820void InitLLLexer(llvm::MemoryBuffer *MB) { 821 assert(TheLexer == 0 && "LL Lexer isn't reentrant yet"); 822 TheLexer = new LLLexer(MB); 823} 824 825int llvmAsmlex() { 826 return TheLexer->LexToken(); 827} 828const char *LLLgetTokenStart() { return TheLexer->getTokStart(); } 829unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); } 830std::string LLLgetFilename() { return TheLexer->getFilename(); } 831unsigned LLLgetLineNo() { return TheLexer->getLineNo(); } 832 833void FreeLexer() { 834 delete TheLexer; 835 TheLexer = 0; 836} 837