LLLexer.cpp revision 73ddd4f00dd2a4b7b68a1500bc7e3322cab51270
1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Implement the Lexer for .ll files. 11// 12//===----------------------------------------------------------------------===// 13 14#include "LLLexer.h" 15#include "ParserInternals.h" 16#include "llvm/Support/MemoryBuffer.h" 17#include "llvm/Support/MathExtras.h" 18 19#include <list> 20#include "llvmAsmParser.h" 21 22#include <cstring> 23using namespace llvm; 24 25//===----------------------------------------------------------------------===// 26// Helper functions. 27//===----------------------------------------------------------------------===// 28 29// atoull - Convert an ascii string of decimal digits into the unsigned long 30// long representation... this does not have to do input error checking, 31// because we know that the input will be matched by a suitable regex... 32// 33static uint64_t atoull(const char *Buffer, const char *End) { 34 uint64_t Result = 0; 35 for (; Buffer != End; Buffer++) { 36 uint64_t OldRes = Result; 37 Result *= 10; 38 Result += *Buffer-'0'; 39 if (Result < OldRes) { // Uh, oh, overflow detected!!! 40 GenerateError("constant bigger than 64 bits detected!"); 41 return 0; 42 } 43 } 44 return Result; 45} 46 47static uint64_t HexIntToVal(const char *Buffer, const char *End) { 48 uint64_t Result = 0; 49 for (; Buffer != End; ++Buffer) { 50 uint64_t OldRes = Result; 51 Result *= 16; 52 char C = *Buffer; 53 if (C >= '0' && C <= '9') 54 Result += C-'0'; 55 else if (C >= 'A' && C <= 'F') 56 Result += C-'A'+10; 57 else if (C >= 'a' && C <= 'f') 58 Result += C-'a'+10; 59 60 if (Result < OldRes) { // Uh, oh, overflow detected!!! 61 GenerateError("constant bigger than 64 bits detected!"); 62 return 0; 63 } 64 } 65 return Result; 66} 67 68// HexToFP - Convert the ascii string in hexadecimal format to the floating 69// point representation of it. 70// 71static double HexToFP(const char *Buffer, const char *End) { 72 return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double 73} 74 75static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ 76 Pair[0] = 0; 77 for (int i=0; i<16; i++, Buffer++) { 78 assert(Buffer != End); 79 Pair[0] *= 16; 80 char C = *Buffer; 81 if (C >= '0' && C <= '9') 82 Pair[0] += C-'0'; 83 else if (C >= 'A' && C <= 'F') 84 Pair[0] += C-'A'+10; 85 else if (C >= 'a' && C <= 'f') 86 Pair[0] += C-'a'+10; 87 } 88 Pair[1] = 0; 89 for (int i=0; i<16 && Buffer != End; i++, Buffer++) { 90 Pair[1] *= 16; 91 char C = *Buffer; 92 if (C >= '0' && C <= '9') 93 Pair[1] += C-'0'; 94 else if (C >= 'A' && C <= 'F') 95 Pair[1] += C-'A'+10; 96 else if (C >= 'a' && C <= 'f') 97 Pair[1] += C-'a'+10; 98 } 99 if (Buffer != End) 100 GenerateError("constant bigger than 128 bits detected!"); 101} 102 103// UnEscapeLexed - Run through the specified buffer and change \xx codes to the 104// appropriate character. 105static void UnEscapeLexed(std::string &Str) { 106 if (Str.empty()) return; 107 108 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); 109 char *BOut = Buffer; 110 for (char *BIn = Buffer; BIn != EndBuffer; ) { 111 if (BIn[0] == '\\') { 112 if (BIn < EndBuffer-1 && BIn[1] == '\\') { 113 *BOut++ = '\\'; // Two \ becomes one 114 BIn += 2; 115 } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { 116 char Tmp = BIn[3]; BIn[3] = 0; // Terminate string 117 *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number 118 BIn[3] = Tmp; // Restore character 119 BIn += 3; // Skip over handled chars 120 ++BOut; 121 } else { 122 *BOut++ = *BIn++; 123 } 124 } else { 125 *BOut++ = *BIn++; 126 } 127 } 128 Str.resize(BOut-Buffer); 129} 130 131/// isLabelChar - Return true for [-a-zA-Z$._0-9]. 132static bool isLabelChar(char C) { 133 return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; 134} 135 136 137/// isLabelTail - Return true if this pointer points to a valid end of a label. 138static const char *isLabelTail(const char *CurPtr) { 139 while (1) { 140 if (CurPtr[0] == ':') return CurPtr+1; 141 if (!isLabelChar(CurPtr[0])) return 0; 142 ++CurPtr; 143 } 144} 145 146 147 148//===----------------------------------------------------------------------===// 149// Lexer definition. 150//===----------------------------------------------------------------------===// 151 152// FIXME: REMOVE THIS. 153#define YYEOF 0 154#define YYERROR -2 155 156LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) { 157 CurPtr = CurBuf->getBufferStart(); 158} 159 160std::string LLLexer::getFilename() const { 161 return CurBuf->getBufferIdentifier(); 162} 163 164int LLLexer::getNextChar() { 165 char CurChar = *CurPtr++; 166 switch (CurChar) { 167 default: return (unsigned char)CurChar; 168 case 0: 169 // A nul character in the stream is either the end of the current buffer or 170 // a random nul in the file. Disambiguate that here. 171 if (CurPtr-1 != CurBuf->getBufferEnd()) 172 return 0; // Just whitespace. 173 174 // Otherwise, return end of file. 175 --CurPtr; // Another call to lex will return EOF again. 176 return EOF; 177 case '\n': 178 case '\r': 179 // Handle the newline character by ignoring it and incrementing the line 180 // count. However, be careful about 'dos style' files with \n\r in them. 181 // Only treat a \n\r or \r\n as a single line. 182 if ((*CurPtr == '\n' || (*CurPtr == '\r')) && 183 *CurPtr != CurChar) 184 ++CurPtr; // Eat the two char newline sequence. 185 186 ++CurLineNo; 187 return '\n'; 188 } 189} 190 191 192int LLLexer::LexToken() { 193 TokStart = CurPtr; 194 195 int CurChar = getNextChar(); 196 197 switch (CurChar) { 198 default: 199 // Handle letters: [a-zA-Z_] 200 if (isalpha(CurChar) || CurChar == '_') 201 return LexIdentifier(); 202 203 return CurChar; 204 case EOF: return YYEOF; 205 case 0: 206 case ' ': 207 case '\t': 208 case '\n': 209 case '\r': 210 // Ignore whitespace. 211 return LexToken(); 212 case '+': return LexPositive(); 213 case '@': return LexAt(); 214 case '%': return LexPercent(); 215 case '"': return LexQuote(); 216 case '.': 217 if (const char *Ptr = isLabelTail(CurPtr)) { 218 CurPtr = Ptr; 219 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 220 return LABELSTR; 221 } 222 if (CurPtr[0] == '.' && CurPtr[1] == '.') { 223 CurPtr += 2; 224 return DOTDOTDOT; 225 } 226 return '.'; 227 case '$': 228 if (const char *Ptr = isLabelTail(CurPtr)) { 229 CurPtr = Ptr; 230 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 231 return LABELSTR; 232 } 233 return '$'; 234 case ';': 235 SkipLineComment(); 236 return LexToken(); 237 case '0': case '1': case '2': case '3': case '4': 238 case '5': case '6': case '7': case '8': case '9': 239 case '-': 240 return LexDigitOrNegative(); 241 } 242} 243 244void LLLexer::SkipLineComment() { 245 while (1) { 246 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) 247 return; 248 } 249} 250 251/// LexAt - Lex all tokens that start with an @ character: 252/// AtStringConstant @\"[^\"]*\" 253/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]* 254/// GlobalVarID @[0-9]+ 255int LLLexer::LexAt() { 256 // Handle AtStringConstant: @\"[^\"]*\" 257 if (CurPtr[0] == '"') { 258 ++CurPtr; 259 260 while (1) { 261 int CurChar = getNextChar(); 262 263 if (CurChar == EOF) { 264 GenerateError("End of file in global variable name"); 265 return YYERROR; 266 } 267 if (CurChar == '"') { 268 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 269 UnEscapeLexed(*llvmAsmlval.StrVal); 270 return ATSTRINGCONSTANT; 271 } 272 } 273 } 274 275 // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* 276 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 277 CurPtr[0] == '.' || CurPtr[0] == '_') { 278 ++CurPtr; 279 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 280 CurPtr[0] == '.' || CurPtr[0] == '_') 281 ++CurPtr; 282 283 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @ 284 return GLOBALVAR; 285 } 286 287 // Handle GlobalVarID: @[0-9]+ 288 if (isdigit(CurPtr[0])) { 289 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 290 /*empty*/; 291 292 uint64_t Val = atoull(TokStart+1, CurPtr); 293 if ((unsigned)Val != Val) 294 GenerateError("Invalid value number (too large)!"); 295 llvmAsmlval.UIntVal = unsigned(Val); 296 return GLOBALVAL_ID; 297 } 298 299 return '@'; 300} 301 302 303/// LexPercent - Lex all tokens that start with a % character: 304/// PctStringConstant %\"[^\"]*\" 305/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]* 306/// LocalVarID %[0-9]+ 307int LLLexer::LexPercent() { 308 // Handle PctStringConstant: %\"[^\"]*\" 309 if (CurPtr[0] == '"') { 310 ++CurPtr; 311 312 while (1) { 313 int CurChar = getNextChar(); 314 315 if (CurChar == EOF) { 316 GenerateError("End of file in local variable name"); 317 return YYERROR; 318 } 319 if (CurChar == '"') { 320 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 321 UnEscapeLexed(*llvmAsmlval.StrVal); 322 return PCTSTRINGCONSTANT; 323 } 324 } 325 } 326 327 // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* 328 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 329 CurPtr[0] == '.' || CurPtr[0] == '_') { 330 ++CurPtr; 331 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 332 CurPtr[0] == '.' || CurPtr[0] == '_') 333 ++CurPtr; 334 335 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip % 336 return LOCALVAR; 337 } 338 339 // Handle LocalVarID: %[0-9]+ 340 if (isdigit(CurPtr[0])) { 341 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 342 /*empty*/; 343 344 uint64_t Val = atoull(TokStart+1, CurPtr); 345 if ((unsigned)Val != Val) 346 GenerateError("Invalid value number (too large)!"); 347 llvmAsmlval.UIntVal = unsigned(Val); 348 return LOCALVAL_ID; 349 } 350 351 return '%'; 352} 353 354/// LexQuote - Lex all tokens that start with a " character: 355/// QuoteLabel "[^"]+": 356/// StringConstant "[^"]*" 357int LLLexer::LexQuote() { 358 while (1) { 359 int CurChar = getNextChar(); 360 361 if (CurChar == EOF) { 362 GenerateError("End of file in quoted string"); 363 return YYERROR; 364 } 365 366 if (CurChar != '"') continue; 367 368 if (CurPtr[0] != ':') { 369 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1); 370 UnEscapeLexed(*llvmAsmlval.StrVal); 371 return STRINGCONSTANT; 372 } 373 374 ++CurPtr; 375 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2); 376 UnEscapeLexed(*llvmAsmlval.StrVal); 377 return LABELSTR; 378 } 379} 380 381static bool JustWhitespaceNewLine(const char *&Ptr) { 382 const char *ThisPtr = Ptr; 383 while (*ThisPtr == ' ' || *ThisPtr == '\t') 384 ++ThisPtr; 385 if (*ThisPtr == '\n' || *ThisPtr == '\r') { 386 Ptr = ThisPtr; 387 return true; 388 } 389 return false; 390} 391 392 393/// LexIdentifier: Handle several related productions: 394/// Label [-a-zA-Z$._0-9]+: 395/// IntegerType i[0-9]+ 396/// Keyword sdiv, float, ... 397/// HexIntConstant [us]0x[0-9A-Fa-f]+ 398int LLLexer::LexIdentifier() { 399 const char *StartChar = CurPtr; 400 const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; 401 const char *KeywordEnd = 0; 402 403 for (; isLabelChar(*CurPtr); ++CurPtr) { 404 // If we decide this is an integer, remember the end of the sequence. 405 if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; 406 if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; 407 } 408 409 // If we stopped due to a colon, this really is a label. 410 if (*CurPtr == ':') { 411 llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++); 412 return LABELSTR; 413 } 414 415 // Otherwise, this wasn't a label. If this was valid as an integer type, 416 // return it. 417 if (IntEnd == 0) IntEnd = CurPtr; 418 if (IntEnd != StartChar) { 419 CurPtr = IntEnd; 420 uint64_t NumBits = atoull(StartChar, CurPtr); 421 if (NumBits < IntegerType::MIN_INT_BITS || 422 NumBits > IntegerType::MAX_INT_BITS) { 423 GenerateError("Bitwidth for integer type out of range!"); 424 return YYERROR; 425 } 426 const Type* Ty = IntegerType::get(NumBits); 427 llvmAsmlval.PrimType = Ty; 428 return INTTYPE; 429 } 430 431 // Otherwise, this was a letter sequence. See which keyword this is. 432 if (KeywordEnd == 0) KeywordEnd = CurPtr; 433 CurPtr = KeywordEnd; 434 --StartChar; 435 unsigned Len = CurPtr-StartChar; 436#define KEYWORD(STR, TOK) \ 437 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK; 438 439 KEYWORD("begin", BEGINTOK); 440 KEYWORD("end", ENDTOK); 441 KEYWORD("true", TRUETOK); 442 KEYWORD("false", FALSETOK); 443 KEYWORD("declare", DECLARE); 444 KEYWORD("define", DEFINE); 445 KEYWORD("global", GLOBAL); 446 KEYWORD("constant", CONSTANT); 447 448 KEYWORD("internal", INTERNAL); 449 KEYWORD("linkonce", LINKONCE); 450 KEYWORD("weak", WEAK); 451 KEYWORD("appending", APPENDING); 452 KEYWORD("dllimport", DLLIMPORT); 453 KEYWORD("dllexport", DLLEXPORT); 454 KEYWORD("common", COMMON); 455 KEYWORD("default", DEFAULT); 456 KEYWORD("hidden", HIDDEN); 457 KEYWORD("protected", PROTECTED); 458 KEYWORD("extern_weak", EXTERN_WEAK); 459 KEYWORD("external", EXTERNAL); 460 KEYWORD("thread_local", THREAD_LOCAL); 461 KEYWORD("zeroinitializer", ZEROINITIALIZER); 462 KEYWORD("undef", UNDEF); 463 KEYWORD("null", NULL_TOK); 464 KEYWORD("to", TO); 465 KEYWORD("tail", TAIL); 466 KEYWORD("target", TARGET); 467 KEYWORD("triple", TRIPLE); 468 KEYWORD("deplibs", DEPLIBS); 469 KEYWORD("datalayout", DATALAYOUT); 470 KEYWORD("volatile", VOLATILE); 471 KEYWORD("align", ALIGN); 472 KEYWORD("addrspace", ADDRSPACE); 473 KEYWORD("section", SECTION); 474 KEYWORD("alias", ALIAS); 475 KEYWORD("module", MODULE); 476 KEYWORD("asm", ASM_TOK); 477 KEYWORD("sideeffect", SIDEEFFECT); 478 KEYWORD("gc", GC); 479 480 KEYWORD("cc", CC_TOK); 481 KEYWORD("ccc", CCC_TOK); 482 KEYWORD("fastcc", FASTCC_TOK); 483 KEYWORD("coldcc", COLDCC_TOK); 484 KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK); 485 KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK); 486 487 KEYWORD("signext", SIGNEXT); 488 KEYWORD("zeroext", ZEROEXT); 489 KEYWORD("inreg", INREG); 490 KEYWORD("sret", SRET); 491 KEYWORD("nounwind", NOUNWIND); 492 KEYWORD("noreturn", NORETURN); 493 KEYWORD("noalias", NOALIAS); 494 KEYWORD("nocapture", NOCAPTURE); 495 KEYWORD("byval", BYVAL); 496 KEYWORD("nest", NEST); 497 KEYWORD("readnone", READNONE); 498 KEYWORD("readonly", READONLY); 499 500 KEYWORD("noinline", NOINLINE); 501 KEYWORD("alwaysinline", ALWAYSINLINE); 502 KEYWORD("optsize", OPTSIZE); 503 KEYWORD("ssp", SSP); 504 KEYWORD("sspreq", SSPREQ); 505 506 KEYWORD("type", TYPE); 507 KEYWORD("opaque", OPAQUE); 508 509 KEYWORD("eq" , EQ); 510 KEYWORD("ne" , NE); 511 KEYWORD("slt", SLT); 512 KEYWORD("sgt", SGT); 513 KEYWORD("sle", SLE); 514 KEYWORD("sge", SGE); 515 KEYWORD("ult", ULT); 516 KEYWORD("ugt", UGT); 517 KEYWORD("ule", ULE); 518 KEYWORD("uge", UGE); 519 KEYWORD("oeq", OEQ); 520 KEYWORD("one", ONE); 521 KEYWORD("olt", OLT); 522 KEYWORD("ogt", OGT); 523 KEYWORD("ole", OLE); 524 KEYWORD("oge", OGE); 525 KEYWORD("ord", ORD); 526 KEYWORD("uno", UNO); 527 KEYWORD("ueq", UEQ); 528 KEYWORD("une", UNE); 529#undef KEYWORD 530 531 // Keywords for types. 532#define TYPEKEYWORD(STR, LLVMTY, TOK) \ 533 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 534 llvmAsmlval.PrimType = LLVMTY; return TOK; } 535 TYPEKEYWORD("void", Type::VoidTy, VOID); 536 TYPEKEYWORD("float", Type::FloatTy, FLOAT); 537 TYPEKEYWORD("double", Type::DoubleTy, DOUBLE); 538 TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80); 539 TYPEKEYWORD("fp128", Type::FP128Ty, FP128); 540 TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128); 541 TYPEKEYWORD("label", Type::LabelTy, LABEL); 542#undef TYPEKEYWORD 543 544 // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is 545 // to avoid conflicting with the sext/zext instructions, below. 546 if (Len == 4 && !memcmp(StartChar, "sext", 4)) { 547 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 548 if (JustWhitespaceNewLine(CurPtr)) 549 return SIGNEXT; 550 } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { 551 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 552 if (JustWhitespaceNewLine(CurPtr)) 553 return ZEROEXT; 554 } 555 556 // Keywords for instructions. 557#define INSTKEYWORD(STR, type, Enum, TOK) \ 558 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 559 llvmAsmlval.type = Instruction::Enum; return TOK; } 560 561 INSTKEYWORD("add", BinaryOpVal, Add, ADD); 562 INSTKEYWORD("sub", BinaryOpVal, Sub, SUB); 563 INSTKEYWORD("mul", BinaryOpVal, Mul, MUL); 564 INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV); 565 INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV); 566 INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV); 567 INSTKEYWORD("urem", BinaryOpVal, URem, UREM); 568 INSTKEYWORD("srem", BinaryOpVal, SRem, SREM); 569 INSTKEYWORD("frem", BinaryOpVal, FRem, FREM); 570 INSTKEYWORD("shl", BinaryOpVal, Shl, SHL); 571 INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR); 572 INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR); 573 INSTKEYWORD("and", BinaryOpVal, And, AND); 574 INSTKEYWORD("or", BinaryOpVal, Or , OR ); 575 INSTKEYWORD("xor", BinaryOpVal, Xor, XOR); 576 INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP); 577 INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP); 578 INSTKEYWORD("vicmp", OtherOpVal, VICmp, VICMP); 579 INSTKEYWORD("vfcmp", OtherOpVal, VFCmp, VFCMP); 580 581 INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK); 582 INSTKEYWORD("call", OtherOpVal, Call, CALL); 583 INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC); 584 INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT); 585 INSTKEYWORD("sext", CastOpVal, SExt, SEXT); 586 INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC); 587 INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT); 588 INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP); 589 INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP); 590 INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI); 591 INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI); 592 INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR); 593 INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT); 594 INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST); 595 INSTKEYWORD("select", OtherOpVal, Select, SELECT); 596 INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG); 597 INSTKEYWORD("ret", TermOpVal, Ret, RET); 598 INSTKEYWORD("br", TermOpVal, Br, BR); 599 INSTKEYWORD("switch", TermOpVal, Switch, SWITCH); 600 INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE); 601 INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND); 602 INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE); 603 604 INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC); 605 INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA); 606 INSTKEYWORD("free", MemOpVal, Free, FREE); 607 INSTKEYWORD("load", MemOpVal, Load, LOAD); 608 INSTKEYWORD("store", MemOpVal, Store, STORE); 609 INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR); 610 611 INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT); 612 INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT); 613 INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR); 614 INSTKEYWORD("getresult", OtherOpVal, ExtractValue, GETRESULT); 615 INSTKEYWORD("extractvalue", OtherOpVal, ExtractValue, EXTRACTVALUE); 616 INSTKEYWORD("insertvalue", OtherOpVal, InsertValue, INSERTVALUE); 617#undef INSTKEYWORD 618 619 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by 620 // the CFE to avoid forcing it to deal with 64-bit numbers. 621 if ((TokStart[0] == 'u' || TokStart[0] == 's') && 622 TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { 623 int len = CurPtr-TokStart-3; 624 uint32_t bits = len * 4; 625 APInt Tmp(bits, TokStart+3, len, 16); 626 uint32_t activeBits = Tmp.getActiveBits(); 627 if (activeBits > 0 && activeBits < bits) 628 Tmp.trunc(activeBits); 629 if (Tmp.getBitWidth() > 64) { 630 llvmAsmlval.APIntVal = new APInt(Tmp); 631 return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL; 632 } else if (TokStart[0] == 's') { 633 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 634 return ESINT64VAL; 635 } else { 636 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 637 return EUINT64VAL; 638 } 639 } 640 641 // If this is "cc1234", return this as just "cc". 642 if (TokStart[0] == 'c' && TokStart[1] == 'c') { 643 CurPtr = TokStart+2; 644 return CC_TOK; 645 } 646 647 // If this starts with "call", return it as CALL. This is to support old 648 // broken .ll files. FIXME: remove this with LLVM 3.0. 649 if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) { 650 CurPtr = TokStart+4; 651 llvmAsmlval.OtherOpVal = Instruction::Call; 652 return CALL; 653 } 654 655 // Finally, if this isn't known, return just a single character. 656 CurPtr = TokStart+1; 657 return TokStart[0]; 658} 659 660 661/// Lex0x: Handle productions that start with 0x, knowing that it matches and 662/// that this is not a label: 663/// HexFPConstant 0x[0-9A-Fa-f]+ 664/// HexFP80Constant 0xK[0-9A-Fa-f]+ 665/// HexFP128Constant 0xL[0-9A-Fa-f]+ 666/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 667int LLLexer::Lex0x() { 668 CurPtr = TokStart + 2; 669 670 char Kind; 671 if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { 672 Kind = *CurPtr++; 673 } else { 674 Kind = 'J'; 675 } 676 677 if (!isxdigit(CurPtr[0])) { 678 // Bad token, return it as just zero. 679 CurPtr = TokStart+1; 680 return '0'; 681 } 682 683 while (isxdigit(CurPtr[0])) 684 ++CurPtr; 685 686 if (Kind == 'J') { 687 // HexFPConstant - Floating point constant represented in IEEE format as a 688 // hexadecimal number for when exponential notation is not precise enough. 689 // Float and double only. 690 llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr)); 691 return FPVAL; 692 } 693 694 uint64_t Pair[2]; 695 HexToIntPair(TokStart+3, CurPtr, Pair); 696 switch (Kind) { 697 default: assert(0 && "Unknown kind!"); 698 case 'K': 699 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) 700 llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair)); 701 return FPVAL; 702 case 'L': 703 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) 704 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true); 705 return FPVAL; 706 case 'M': 707 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) 708 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair)); 709 return FPVAL; 710 } 711} 712 713/// LexIdentifier: Handle several related productions: 714/// Label [-a-zA-Z$._0-9]+: 715/// NInteger -[0-9]+ 716/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 717/// PInteger [0-9]+ 718/// HexFPConstant 0x[0-9A-Fa-f]+ 719/// HexFP80Constant 0xK[0-9A-Fa-f]+ 720/// HexFP128Constant 0xL[0-9A-Fa-f]+ 721/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 722int LLLexer::LexDigitOrNegative() { 723 // If the letter after the negative is a number, this is probably a label. 724 if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { 725 // Okay, this is not a number after the -, it's probably a label. 726 if (const char *End = isLabelTail(CurPtr)) { 727 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 728 CurPtr = End; 729 return LABELSTR; 730 } 731 732 return CurPtr[-1]; 733 } 734 735 // At this point, it is either a label, int or fp constant. 736 737 // Skip digits, we have at least one. 738 for (; isdigit(CurPtr[0]); ++CurPtr) 739 /*empty*/; 740 741 // Check to see if this really is a label afterall, e.g. "-1:". 742 if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { 743 if (const char *End = isLabelTail(CurPtr)) { 744 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 745 CurPtr = End; 746 return LABELSTR; 747 } 748 } 749 750 // If the next character is a '.', then it is a fp value, otherwise its 751 // integer. 752 if (CurPtr[0] != '.') { 753 if (TokStart[0] == '0' && TokStart[1] == 'x') 754 return Lex0x(); 755 unsigned Len = CurPtr-TokStart; 756 uint32_t numBits = ((Len * 64) / 19) + 2; 757 APInt Tmp(numBits, TokStart, Len, 10); 758 if (TokStart[0] == '-') { 759 uint32_t minBits = Tmp.getMinSignedBits(); 760 if (minBits > 0 && minBits < numBits) 761 Tmp.trunc(minBits); 762 if (Tmp.getBitWidth() > 64) { 763 llvmAsmlval.APIntVal = new APInt(Tmp); 764 return ESAPINTVAL; 765 } else { 766 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 767 return ESINT64VAL; 768 } 769 } else { 770 uint32_t activeBits = Tmp.getActiveBits(); 771 if (activeBits > 0 && activeBits < numBits) 772 Tmp.trunc(activeBits); 773 if (Tmp.getBitWidth() > 64) { 774 llvmAsmlval.APIntVal = new APInt(Tmp); 775 return EUAPINTVAL; 776 } else { 777 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 778 return EUINT64VAL; 779 } 780 } 781 } 782 783 ++CurPtr; 784 785 // Skip over [0-9]*([eE][-+]?[0-9]+)? 786 while (isdigit(CurPtr[0])) ++CurPtr; 787 788 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 789 if (isdigit(CurPtr[1]) || 790 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 791 CurPtr += 2; 792 while (isdigit(CurPtr[0])) ++CurPtr; 793 } 794 } 795 796 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 797 return FPVAL; 798} 799 800/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 801int LLLexer::LexPositive() { 802 // If the letter after the negative is a number, this is probably not a 803 // label. 804 if (!isdigit(CurPtr[0])) 805 return CurPtr[-1]; 806 807 // Skip digits. 808 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 809 /*empty*/; 810 811 // At this point, we need a '.'. 812 if (CurPtr[0] != '.') { 813 CurPtr = TokStart+1; 814 return TokStart[0]; 815 } 816 817 ++CurPtr; 818 819 // Skip over [0-9]*([eE][-+]?[0-9]+)? 820 while (isdigit(CurPtr[0])) ++CurPtr; 821 822 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 823 if (isdigit(CurPtr[1]) || 824 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 825 CurPtr += 2; 826 while (isdigit(CurPtr[0])) ++CurPtr; 827 } 828 } 829 830 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 831 return FPVAL; 832} 833 834 835//===----------------------------------------------------------------------===// 836// Define the interface to this file. 837//===----------------------------------------------------------------------===// 838 839static LLLexer *TheLexer; 840 841void InitLLLexer(llvm::MemoryBuffer *MB) { 842 assert(TheLexer == 0 && "LL Lexer isn't reentrant yet"); 843 TheLexer = new LLLexer(MB); 844} 845 846int llvmAsmlex() { 847 return TheLexer->LexToken(); 848} 849const char *LLLgetTokenStart() { return TheLexer->getTokStart(); } 850unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); } 851std::string LLLgetFilename() { return TheLexer->getFilename(); } 852unsigned LLLgetLineNo() { return TheLexer->getLineNo(); } 853 854void FreeLexer() { 855 delete TheLexer; 856 TheLexer = 0; 857} 858