LLLexer.cpp revision d6ffcf91172d89afe3fea6f3f3cc773c5aed4406
1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Implement the Lexer for .ll files. 11// 12//===----------------------------------------------------------------------===// 13 14#include "LLLexer.h" 15#include "ParserInternals.h" 16#include "llvm/Support/MemoryBuffer.h" 17#include "llvm/Support/MathExtras.h" 18 19#include <list> 20#include "llvmAsmParser.h" 21using namespace llvm; 22 23//===----------------------------------------------------------------------===// 24// Helper functions. 25//===----------------------------------------------------------------------===// 26 27// atoull - Convert an ascii string of decimal digits into the unsigned long 28// long representation... this does not have to do input error checking, 29// because we know that the input will be matched by a suitable regex... 30// 31static uint64_t atoull(const char *Buffer, const char *End) { 32 uint64_t Result = 0; 33 for (; Buffer != End; Buffer++) { 34 uint64_t OldRes = Result; 35 Result *= 10; 36 Result += *Buffer-'0'; 37 if (Result < OldRes) { // Uh, oh, overflow detected!!! 38 GenerateError("constant bigger than 64 bits detected!"); 39 return 0; 40 } 41 } 42 return Result; 43} 44 45static uint64_t HexIntToVal(const char *Buffer, const char *End) { 46 uint64_t Result = 0; 47 for (; Buffer != End; ++Buffer) { 48 uint64_t OldRes = Result; 49 Result *= 16; 50 char C = *Buffer; 51 if (C >= '0' && C <= '9') 52 Result += C-'0'; 53 else if (C >= 'A' && C <= 'F') 54 Result += C-'A'+10; 55 else if (C >= 'a' && C <= 'f') 56 Result += C-'a'+10; 57 58 if (Result < OldRes) { // Uh, oh, overflow detected!!! 59 GenerateError("constant bigger than 64 bits detected!"); 60 return 0; 61 } 62 } 63 return Result; 64} 65 66// HexToFP - Convert the ascii string in hexadecimal format to the floating 67// point representation of it. 68// 69static double HexToFP(const char *Buffer, const char *End) { 70 return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double 71} 72 73static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ 74 Pair[0] = 0; 75 for (int i=0; i<16; i++, Buffer++) { 76 assert(Buffer != End); 77 Pair[0] *= 16; 78 char C = *Buffer; 79 if (C >= '0' && C <= '9') 80 Pair[0] += C-'0'; 81 else if (C >= 'A' && C <= 'F') 82 Pair[0] += C-'A'+10; 83 else if (C >= 'a' && C <= 'f') 84 Pair[0] += C-'a'+10; 85 } 86 Pair[1] = 0; 87 for (int i=0; i<16 && Buffer != End; i++, Buffer++) { 88 Pair[1] *= 16; 89 char C = *Buffer; 90 if (C >= '0' && C <= '9') 91 Pair[1] += C-'0'; 92 else if (C >= 'A' && C <= 'F') 93 Pair[1] += C-'A'+10; 94 else if (C >= 'a' && C <= 'f') 95 Pair[1] += C-'a'+10; 96 } 97 if (Buffer != End) 98 GenerateError("constant bigger than 128 bits detected!"); 99} 100 101// UnEscapeLexed - Run through the specified buffer and change \xx codes to the 102// appropriate character. 103static void UnEscapeLexed(std::string &Str) { 104 if (Str.empty()) return; 105 106 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); 107 char *BOut = Buffer; 108 for (char *BIn = Buffer; BIn != EndBuffer; ) { 109 if (BIn[0] == '\\') { 110 if (BIn < EndBuffer-1 && BIn[1] == '\\') { 111 *BOut++ = '\\'; // Two \ becomes one 112 BIn += 2; 113 } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { 114 char Tmp = BIn[3]; BIn[3] = 0; // Terminate string 115 *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number 116 BIn[3] = Tmp; // Restore character 117 BIn += 3; // Skip over handled chars 118 ++BOut; 119 } else { 120 *BOut++ = *BIn++; 121 } 122 } else { 123 *BOut++ = *BIn++; 124 } 125 } 126 Str.resize(BOut-Buffer); 127} 128 129/// isLabelChar - Return true for [-a-zA-Z$._0-9]. 130static bool isLabelChar(char C) { 131 return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; 132} 133 134 135/// isLabelTail - Return true if this pointer points to a valid end of a label. 136static const char *isLabelTail(const char *CurPtr) { 137 while (1) { 138 if (CurPtr[0] == ':') return CurPtr+1; 139 if (!isLabelChar(CurPtr[0])) return 0; 140 ++CurPtr; 141 } 142} 143 144 145 146//===----------------------------------------------------------------------===// 147// Lexer definition. 148//===----------------------------------------------------------------------===// 149 150// FIXME: REMOVE THIS. 151#define YYEOF 0 152#define YYERROR -2 153 154LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) { 155 CurPtr = CurBuf->getBufferStart(); 156} 157 158std::string LLLexer::getFilename() const { 159 return CurBuf->getBufferIdentifier(); 160} 161 162int LLLexer::getNextChar() { 163 char CurChar = *CurPtr++; 164 switch (CurChar) { 165 default: return (unsigned char)CurChar; 166 case 0: 167 // A nul character in the stream is either the end of the current buffer or 168 // a random nul in the file. Disambiguate that here. 169 if (CurPtr-1 != CurBuf->getBufferEnd()) 170 return 0; // Just whitespace. 171 172 // Otherwise, return end of file. 173 --CurPtr; // Another call to lex will return EOF again. 174 return EOF; 175 case '\n': 176 case '\r': 177 // Handle the newline character by ignoring it and incrementing the line 178 // count. However, be careful about 'dos style' files with \n\r in them. 179 // Only treat a \n\r or \r\n as a single line. 180 if ((*CurPtr == '\n' || (*CurPtr == '\r')) && 181 *CurPtr != CurChar) 182 ++CurPtr; // Eat the two char newline sequence. 183 184 ++CurLineNo; 185 return '\n'; 186 } 187} 188 189 190int LLLexer::LexToken() { 191 TokStart = CurPtr; 192 193 int CurChar = getNextChar(); 194 195 switch (CurChar) { 196 default: 197 // Handle letters: [a-zA-Z_] 198 if (isalpha(CurChar) || CurChar == '_') 199 return LexIdentifier(); 200 201 return CurChar; 202 case EOF: return YYEOF; 203 case 0: 204 case ' ': 205 case '\t': 206 case '\n': 207 case '\r': 208 // Ignore whitespace. 209 return LexToken(); 210 case '+': return LexPositive(); 211 case '@': return LexAt(); 212 case '%': return LexPercent(); 213 case '"': return LexQuote(); 214 case '.': 215 if (const char *Ptr = isLabelTail(CurPtr)) { 216 CurPtr = Ptr; 217 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 218 return LABELSTR; 219 } 220 if (CurPtr[0] == '.' && CurPtr[1] == '.') { 221 CurPtr += 2; 222 return DOTDOTDOT; 223 } 224 return '.'; 225 case '$': 226 if (const char *Ptr = isLabelTail(CurPtr)) { 227 CurPtr = Ptr; 228 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 229 return LABELSTR; 230 } 231 return '$'; 232 case ';': 233 SkipLineComment(); 234 return LexToken(); 235 case '0': case '1': case '2': case '3': case '4': 236 case '5': case '6': case '7': case '8': case '9': 237 case '-': 238 return LexDigitOrNegative(); 239 } 240} 241 242void LLLexer::SkipLineComment() { 243 while (1) { 244 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) 245 return; 246 } 247} 248 249/// LexAt - Lex all tokens that start with an @ character: 250/// AtStringConstant @\"[^\"]*\" 251/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]* 252/// GlobalVarID @[0-9]+ 253int LLLexer::LexAt() { 254 // Handle AtStringConstant: @\"[^\"]*\" 255 if (CurPtr[0] == '"') { 256 ++CurPtr; 257 258 while (1) { 259 int CurChar = getNextChar(); 260 261 if (CurChar == EOF) { 262 GenerateError("End of file in global variable name"); 263 return YYERROR; 264 } 265 if (CurChar == '"') { 266 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 267 UnEscapeLexed(*llvmAsmlval.StrVal); 268 return ATSTRINGCONSTANT; 269 } 270 } 271 } 272 273 // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* 274 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 275 CurPtr[0] == '.' || CurPtr[0] == '_') { 276 ++CurPtr; 277 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 278 CurPtr[0] == '.' || CurPtr[0] == '_') 279 ++CurPtr; 280 281 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @ 282 return GLOBALVAR; 283 } 284 285 // Handle GlobalVarID: @[0-9]+ 286 if (isdigit(CurPtr[0])) { 287 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 288 /*empty*/; 289 290 uint64_t Val = atoull(TokStart+1, CurPtr); 291 if ((unsigned)Val != Val) 292 GenerateError("Invalid value number (too large)!"); 293 llvmAsmlval.UIntVal = unsigned(Val); 294 return GLOBALVAL_ID; 295 } 296 297 return '@'; 298} 299 300 301/// LexPercent - Lex all tokens that start with a % character: 302/// PctStringConstant %\"[^\"]*\" 303/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]* 304/// LocalVarID %[0-9]+ 305int LLLexer::LexPercent() { 306 // Handle PctStringConstant: %\"[^\"]*\" 307 if (CurPtr[0] == '"') { 308 ++CurPtr; 309 310 while (1) { 311 int CurChar = getNextChar(); 312 313 if (CurChar == EOF) { 314 GenerateError("End of file in local variable name"); 315 return YYERROR; 316 } 317 if (CurChar == '"') { 318 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 319 UnEscapeLexed(*llvmAsmlval.StrVal); 320 return PCTSTRINGCONSTANT; 321 } 322 } 323 } 324 325 // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* 326 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 327 CurPtr[0] == '.' || CurPtr[0] == '_') { 328 ++CurPtr; 329 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 330 CurPtr[0] == '.' || CurPtr[0] == '_') 331 ++CurPtr; 332 333 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip % 334 return LOCALVAR; 335 } 336 337 // Handle LocalVarID: %[0-9]+ 338 if (isdigit(CurPtr[0])) { 339 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 340 /*empty*/; 341 342 uint64_t Val = atoull(TokStart+1, CurPtr); 343 if ((unsigned)Val != Val) 344 GenerateError("Invalid value number (too large)!"); 345 llvmAsmlval.UIntVal = unsigned(Val); 346 return LOCALVAL_ID; 347 } 348 349 return '%'; 350} 351 352/// LexQuote - Lex all tokens that start with a " character: 353/// QuoteLabel "[^"]+": 354/// StringConstant "[^"]*" 355int LLLexer::LexQuote() { 356 while (1) { 357 int CurChar = getNextChar(); 358 359 if (CurChar == EOF) { 360 GenerateError("End of file in quoted string"); 361 return YYERROR; 362 } 363 364 if (CurChar != '"') continue; 365 366 if (CurPtr[0] != ':') { 367 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1); 368 UnEscapeLexed(*llvmAsmlval.StrVal); 369 return STRINGCONSTANT; 370 } 371 372 ++CurPtr; 373 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2); 374 UnEscapeLexed(*llvmAsmlval.StrVal); 375 return LABELSTR; 376 } 377} 378 379static bool JustWhitespaceNewLine(const char *&Ptr) { 380 const char *ThisPtr = Ptr; 381 while (*ThisPtr == ' ' || *ThisPtr == '\t') 382 ++ThisPtr; 383 if (*ThisPtr == '\n' || *ThisPtr == '\r') { 384 Ptr = ThisPtr; 385 return true; 386 } 387 return false; 388} 389 390 391/// LexIdentifier: Handle several related productions: 392/// Label [-a-zA-Z$._0-9]+: 393/// IntegerType i[0-9]+ 394/// Keyword sdiv, float, ... 395/// HexIntConstant [us]0x[0-9A-Fa-f]+ 396int LLLexer::LexIdentifier() { 397 const char *StartChar = CurPtr; 398 const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; 399 const char *KeywordEnd = 0; 400 401 for (; isLabelChar(*CurPtr); ++CurPtr) { 402 // If we decide this is an integer, remember the end of the sequence. 403 if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; 404 if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; 405 } 406 407 // If we stopped due to a colon, this really is a label. 408 if (*CurPtr == ':') { 409 llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++); 410 return LABELSTR; 411 } 412 413 // Otherwise, this wasn't a label. If this was valid as an integer type, 414 // return it. 415 if (IntEnd == 0) IntEnd = CurPtr; 416 if (IntEnd != StartChar) { 417 CurPtr = IntEnd; 418 uint64_t NumBits = atoull(StartChar, CurPtr); 419 if (NumBits < IntegerType::MIN_INT_BITS || 420 NumBits > IntegerType::MAX_INT_BITS) { 421 GenerateError("Bitwidth for integer type out of range!"); 422 return YYERROR; 423 } 424 const Type* Ty = IntegerType::get(NumBits); 425 llvmAsmlval.PrimType = Ty; 426 return INTTYPE; 427 } 428 429 // Otherwise, this was a letter sequence. See which keyword this is. 430 if (KeywordEnd == 0) KeywordEnd = CurPtr; 431 CurPtr = KeywordEnd; 432 --StartChar; 433 unsigned Len = CurPtr-StartChar; 434#define KEYWORD(STR, TOK) \ 435 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK; 436 437 KEYWORD("begin", BEGINTOK); 438 KEYWORD("end", ENDTOK); 439 KEYWORD("true", TRUETOK); 440 KEYWORD("false", FALSETOK); 441 KEYWORD("declare", DECLARE); 442 KEYWORD("define", DEFINE); 443 KEYWORD("global", GLOBAL); 444 KEYWORD("constant", CONSTANT); 445 446 KEYWORD("internal", INTERNAL); 447 KEYWORD("linkonce", LINKONCE); 448 KEYWORD("weak", WEAK); 449 KEYWORD("appending", APPENDING); 450 KEYWORD("dllimport", DLLIMPORT); 451 KEYWORD("dllexport", DLLEXPORT); 452 KEYWORD("hidden", HIDDEN); 453 KEYWORD("protected", PROTECTED); 454 KEYWORD("extern_weak", EXTERN_WEAK); 455 KEYWORD("external", EXTERNAL); 456 KEYWORD("thread_local", THREAD_LOCAL); 457 KEYWORD("zeroinitializer", ZEROINITIALIZER); 458 KEYWORD("undef", UNDEF); 459 KEYWORD("null", NULL_TOK); 460 KEYWORD("to", TO); 461 KEYWORD("tail", TAIL); 462 KEYWORD("target", TARGET); 463 KEYWORD("triple", TRIPLE); 464 KEYWORD("deplibs", DEPLIBS); 465 KEYWORD("datalayout", DATALAYOUT); 466 KEYWORD("volatile", VOLATILE); 467 KEYWORD("align", ALIGN); 468 KEYWORD("addrspace", ADDRSPACE); 469 KEYWORD("section", SECTION); 470 KEYWORD("alias", ALIAS); 471 KEYWORD("module", MODULE); 472 KEYWORD("asm", ASM_TOK); 473 KEYWORD("sideeffect", SIDEEFFECT); 474 KEYWORD("gc", GC); 475 476 KEYWORD("cc", CC_TOK); 477 KEYWORD("ccc", CCC_TOK); 478 KEYWORD("fastcc", FASTCC_TOK); 479 KEYWORD("coldcc", COLDCC_TOK); 480 KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK); 481 KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK); 482 483 KEYWORD("signext", SIGNEXT); 484 KEYWORD("zeroext", ZEROEXT); 485 KEYWORD("inreg", INREG); 486 KEYWORD("sret", SRET); 487 KEYWORD("nounwind", NOUNWIND); 488 KEYWORD("noreturn", NORETURN); 489 KEYWORD("noalias", NOALIAS); 490 KEYWORD("byval", BYVAL); 491 KEYWORD("nest", NEST); 492 KEYWORD("readnone", READNONE); 493 KEYWORD("readonly", READONLY); 494 495 KEYWORD("type", TYPE); 496 KEYWORD("opaque", OPAQUE); 497 498 KEYWORD("eq" , EQ); 499 KEYWORD("ne" , NE); 500 KEYWORD("slt", SLT); 501 KEYWORD("sgt", SGT); 502 KEYWORD("sle", SLE); 503 KEYWORD("sge", SGE); 504 KEYWORD("ult", ULT); 505 KEYWORD("ugt", UGT); 506 KEYWORD("ule", ULE); 507 KEYWORD("uge", UGE); 508 KEYWORD("oeq", OEQ); 509 KEYWORD("one", ONE); 510 KEYWORD("olt", OLT); 511 KEYWORD("ogt", OGT); 512 KEYWORD("ole", OLE); 513 KEYWORD("oge", OGE); 514 KEYWORD("ord", ORD); 515 KEYWORD("uno", UNO); 516 KEYWORD("ueq", UEQ); 517 KEYWORD("une", UNE); 518#undef KEYWORD 519 520 // Keywords for types. 521#define TYPEKEYWORD(STR, LLVMTY, TOK) \ 522 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 523 llvmAsmlval.PrimType = LLVMTY; return TOK; } 524 TYPEKEYWORD("void", Type::VoidTy, VOID); 525 TYPEKEYWORD("float", Type::FloatTy, FLOAT); 526 TYPEKEYWORD("double", Type::DoubleTy, DOUBLE); 527 TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80); 528 TYPEKEYWORD("fp128", Type::FP128Ty, FP128); 529 TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128); 530 TYPEKEYWORD("label", Type::LabelTy, LABEL); 531#undef TYPEKEYWORD 532 533 // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is 534 // to avoid conflicting with the sext/zext instructions, below. 535 if (Len == 4 && !memcmp(StartChar, "sext", 4)) { 536 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 537 if (JustWhitespaceNewLine(CurPtr)) 538 return SIGNEXT; 539 } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { 540 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 541 if (JustWhitespaceNewLine(CurPtr)) 542 return ZEROEXT; 543 } 544 545 // Keywords for instructions. 546#define INSTKEYWORD(STR, type, Enum, TOK) \ 547 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 548 llvmAsmlval.type = Instruction::Enum; return TOK; } 549 550 INSTKEYWORD("add", BinaryOpVal, Add, ADD); 551 INSTKEYWORD("sub", BinaryOpVal, Sub, SUB); 552 INSTKEYWORD("mul", BinaryOpVal, Mul, MUL); 553 INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV); 554 INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV); 555 INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV); 556 INSTKEYWORD("urem", BinaryOpVal, URem, UREM); 557 INSTKEYWORD("srem", BinaryOpVal, SRem, SREM); 558 INSTKEYWORD("frem", BinaryOpVal, FRem, FREM); 559 INSTKEYWORD("shl", BinaryOpVal, Shl, SHL); 560 INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR); 561 INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR); 562 INSTKEYWORD("and", BinaryOpVal, And, AND); 563 INSTKEYWORD("or", BinaryOpVal, Or , OR ); 564 INSTKEYWORD("xor", BinaryOpVal, Xor, XOR); 565 INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP); 566 INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP); 567 568 INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK); 569 INSTKEYWORD("call", OtherOpVal, Call, CALL); 570 INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC); 571 INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT); 572 INSTKEYWORD("sext", CastOpVal, SExt, SEXT); 573 INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC); 574 INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT); 575 INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP); 576 INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP); 577 INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI); 578 INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI); 579 INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR); 580 INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT); 581 INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST); 582 INSTKEYWORD("select", OtherOpVal, Select, SELECT); 583 INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG); 584 INSTKEYWORD("ret", TermOpVal, Ret, RET); 585 INSTKEYWORD("br", TermOpVal, Br, BR); 586 INSTKEYWORD("switch", TermOpVal, Switch, SWITCH); 587 INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE); 588 INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND); 589 INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE); 590 591 INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC); 592 INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA); 593 INSTKEYWORD("free", MemOpVal, Free, FREE); 594 INSTKEYWORD("load", MemOpVal, Load, LOAD); 595 INSTKEYWORD("store", MemOpVal, Store, STORE); 596 INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR); 597 598 INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT); 599 INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT); 600 INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR); 601 INSTKEYWORD("getresult", OtherOpVal, GetResult, GETRESULT); 602#undef INSTKEYWORD 603 604 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by 605 // the CFE to avoid forcing it to deal with 64-bit numbers. 606 if ((TokStart[0] == 'u' || TokStart[0] == 's') && 607 TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { 608 int len = CurPtr-TokStart-3; 609 uint32_t bits = len * 4; 610 APInt Tmp(bits, TokStart+3, len, 16); 611 uint32_t activeBits = Tmp.getActiveBits(); 612 if (activeBits > 0 && activeBits < bits) 613 Tmp.trunc(activeBits); 614 if (Tmp.getBitWidth() > 64) { 615 llvmAsmlval.APIntVal = new APInt(Tmp); 616 return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL; 617 } else if (TokStart[0] == 's') { 618 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 619 return ESINT64VAL; 620 } else { 621 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 622 return EUINT64VAL; 623 } 624 } 625 626 // If this is "cc1234", return this as just "cc". 627 if (TokStart[0] == 'c' && TokStart[1] == 'c') { 628 CurPtr = TokStart+2; 629 return CC_TOK; 630 } 631 632 // If this starts with "call", return it as CALL. This is to support old 633 // broken .ll files. FIXME: remove this with LLVM 3.0. 634 if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) { 635 CurPtr = TokStart+4; 636 llvmAsmlval.OtherOpVal = Instruction::Call; 637 return CALL; 638 } 639 640 // Finally, if this isn't known, return just a single character. 641 CurPtr = TokStart+1; 642 return TokStart[0]; 643} 644 645 646/// Lex0x: Handle productions that start with 0x, knowing that it matches and 647/// that this is not a label: 648/// HexFPConstant 0x[0-9A-Fa-f]+ 649/// HexFP80Constant 0xK[0-9A-Fa-f]+ 650/// HexFP128Constant 0xL[0-9A-Fa-f]+ 651/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 652int LLLexer::Lex0x() { 653 CurPtr = TokStart + 2; 654 655 char Kind; 656 if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { 657 Kind = *CurPtr++; 658 } else { 659 Kind = 'J'; 660 } 661 662 if (!isxdigit(CurPtr[0])) { 663 // Bad token, return it as just zero. 664 CurPtr = TokStart+1; 665 return '0'; 666 } 667 668 while (isxdigit(CurPtr[0])) 669 ++CurPtr; 670 671 if (Kind == 'J') { 672 // HexFPConstant - Floating point constant represented in IEEE format as a 673 // hexadecimal number for when exponential notation is not precise enough. 674 // Float and double only. 675 llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr)); 676 return FPVAL; 677 } 678 679 uint64_t Pair[2]; 680 HexToIntPair(TokStart+3, CurPtr, Pair); 681 switch (Kind) { 682 default: assert(0 && "Unknown kind!"); 683 case 'K': 684 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) 685 llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair)); 686 return FPVAL; 687 case 'L': 688 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) 689 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true); 690 return FPVAL; 691 case 'M': 692 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) 693 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair)); 694 return FPVAL; 695 } 696} 697 698/// LexIdentifier: Handle several related productions: 699/// Label [-a-zA-Z$._0-9]+: 700/// NInteger -[0-9]+ 701/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 702/// PInteger [0-9]+ 703/// HexFPConstant 0x[0-9A-Fa-f]+ 704/// HexFP80Constant 0xK[0-9A-Fa-f]+ 705/// HexFP128Constant 0xL[0-9A-Fa-f]+ 706/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 707int LLLexer::LexDigitOrNegative() { 708 // If the letter after the negative is a number, this is probably a label. 709 if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { 710 // Okay, this is not a number after the -, it's probably a label. 711 if (const char *End = isLabelTail(CurPtr)) { 712 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 713 CurPtr = End; 714 return LABELSTR; 715 } 716 717 return CurPtr[-1]; 718 } 719 720 // At this point, it is either a label, int or fp constant. 721 722 // Skip digits, we have at least one. 723 for (; isdigit(CurPtr[0]); ++CurPtr) 724 /*empty*/; 725 726 // Check to see if this really is a label afterall, e.g. "-1:". 727 if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { 728 if (const char *End = isLabelTail(CurPtr)) { 729 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 730 CurPtr = End; 731 return LABELSTR; 732 } 733 } 734 735 // If the next character is a '.', then it is a fp value, otherwise its 736 // integer. 737 if (CurPtr[0] != '.') { 738 if (TokStart[0] == '0' && TokStart[1] == 'x') 739 return Lex0x(); 740 unsigned Len = CurPtr-TokStart; 741 uint32_t numBits = ((Len * 64) / 19) + 2; 742 APInt Tmp(numBits, TokStart, Len, 10); 743 if (TokStart[0] == '-') { 744 uint32_t minBits = Tmp.getMinSignedBits(); 745 if (minBits > 0 && minBits < numBits) 746 Tmp.trunc(minBits); 747 if (Tmp.getBitWidth() > 64) { 748 llvmAsmlval.APIntVal = new APInt(Tmp); 749 return ESAPINTVAL; 750 } else { 751 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 752 return ESINT64VAL; 753 } 754 } else { 755 uint32_t activeBits = Tmp.getActiveBits(); 756 if (activeBits > 0 && activeBits < numBits) 757 Tmp.trunc(activeBits); 758 if (Tmp.getBitWidth() > 64) { 759 llvmAsmlval.APIntVal = new APInt(Tmp); 760 return EUAPINTVAL; 761 } else { 762 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 763 return EUINT64VAL; 764 } 765 } 766 } 767 768 ++CurPtr; 769 770 // Skip over [0-9]*([eE][-+]?[0-9]+)? 771 while (isdigit(CurPtr[0])) ++CurPtr; 772 773 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 774 if (isdigit(CurPtr[1]) || 775 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 776 CurPtr += 2; 777 while (isdigit(CurPtr[0])) ++CurPtr; 778 } 779 } 780 781 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 782 return FPVAL; 783} 784 785/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 786int LLLexer::LexPositive() { 787 // If the letter after the negative is a number, this is probably not a 788 // label. 789 if (!isdigit(CurPtr[0])) 790 return CurPtr[-1]; 791 792 // Skip digits. 793 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 794 /*empty*/; 795 796 // At this point, we need a '.'. 797 if (CurPtr[0] != '.') { 798 CurPtr = TokStart+1; 799 return TokStart[0]; 800 } 801 802 ++CurPtr; 803 804 // Skip over [0-9]*([eE][-+]?[0-9]+)? 805 while (isdigit(CurPtr[0])) ++CurPtr; 806 807 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 808 if (isdigit(CurPtr[1]) || 809 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 810 CurPtr += 2; 811 while (isdigit(CurPtr[0])) ++CurPtr; 812 } 813 } 814 815 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 816 return FPVAL; 817} 818 819 820//===----------------------------------------------------------------------===// 821// Define the interface to this file. 822//===----------------------------------------------------------------------===// 823 824static LLLexer *TheLexer; 825 826void InitLLLexer(llvm::MemoryBuffer *MB) { 827 assert(TheLexer == 0 && "LL Lexer isn't reentrant yet"); 828 TheLexer = new LLLexer(MB); 829} 830 831int llvmAsmlex() { 832 return TheLexer->LexToken(); 833} 834const char *LLLgetTokenStart() { return TheLexer->getTokStart(); } 835unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); } 836std::string LLLgetFilename() { return TheLexer->getFilename(); } 837unsigned LLLgetLineNo() { return TheLexer->getLineNo(); } 838 839void FreeLexer() { 840 delete TheLexer; 841 TheLexer = 0; 842} 843