LLLexer.cpp revision 280a6e607d8eb7401749a92db624a82de47da777
1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Implement the Lexer for .ll files. 11// 12//===----------------------------------------------------------------------===// 13 14#include "LLLexer.h" 15#include "ParserInternals.h" 16#include "llvm/Support/MemoryBuffer.h" 17#include "llvm/Support/MathExtras.h" 18 19#include <list> 20#include "llvmAsmParser.h" 21 22#include <cstring> 23using namespace llvm; 24 25//===----------------------------------------------------------------------===// 26// Helper functions. 27//===----------------------------------------------------------------------===// 28 29// atoull - Convert an ascii string of decimal digits into the unsigned long 30// long representation... this does not have to do input error checking, 31// because we know that the input will be matched by a suitable regex... 32// 33static uint64_t atoull(const char *Buffer, const char *End) { 34 uint64_t Result = 0; 35 for (; Buffer != End; Buffer++) { 36 uint64_t OldRes = Result; 37 Result *= 10; 38 Result += *Buffer-'0'; 39 if (Result < OldRes) { // Uh, oh, overflow detected!!! 40 GenerateError("constant bigger than 64 bits detected!"); 41 return 0; 42 } 43 } 44 return Result; 45} 46 47static uint64_t HexIntToVal(const char *Buffer, const char *End) { 48 uint64_t Result = 0; 49 for (; Buffer != End; ++Buffer) { 50 uint64_t OldRes = Result; 51 Result *= 16; 52 char C = *Buffer; 53 if (C >= '0' && C <= '9') 54 Result += C-'0'; 55 else if (C >= 'A' && C <= 'F') 56 Result += C-'A'+10; 57 else if (C >= 'a' && C <= 'f') 58 Result += C-'a'+10; 59 60 if (Result < OldRes) { // Uh, oh, overflow detected!!! 61 GenerateError("constant bigger than 64 bits detected!"); 62 return 0; 63 } 64 } 65 return Result; 66} 67 68// HexToFP - Convert the ascii string in hexadecimal format to the floating 69// point representation of it. 70// 71static double HexToFP(const char *Buffer, const char *End) { 72 return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double 73} 74 75static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ 76 Pair[0] = 0; 77 for (int i=0; i<16; i++, Buffer++) { 78 assert(Buffer != End); 79 Pair[0] *= 16; 80 char C = *Buffer; 81 if (C >= '0' && C <= '9') 82 Pair[0] += C-'0'; 83 else if (C >= 'A' && C <= 'F') 84 Pair[0] += C-'A'+10; 85 else if (C >= 'a' && C <= 'f') 86 Pair[0] += C-'a'+10; 87 } 88 Pair[1] = 0; 89 for (int i=0; i<16 && Buffer != End; i++, Buffer++) { 90 Pair[1] *= 16; 91 char C = *Buffer; 92 if (C >= '0' && C <= '9') 93 Pair[1] += C-'0'; 94 else if (C >= 'A' && C <= 'F') 95 Pair[1] += C-'A'+10; 96 else if (C >= 'a' && C <= 'f') 97 Pair[1] += C-'a'+10; 98 } 99 if (Buffer != End) 100 GenerateError("constant bigger than 128 bits detected!"); 101} 102 103// UnEscapeLexed - Run through the specified buffer and change \xx codes to the 104// appropriate character. 105static void UnEscapeLexed(std::string &Str) { 106 if (Str.empty()) return; 107 108 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); 109 char *BOut = Buffer; 110 for (char *BIn = Buffer; BIn != EndBuffer; ) { 111 if (BIn[0] == '\\') { 112 if (BIn < EndBuffer-1 && BIn[1] == '\\') { 113 *BOut++ = '\\'; // Two \ becomes one 114 BIn += 2; 115 } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { 116 char Tmp = BIn[3]; BIn[3] = 0; // Terminate string 117 *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number 118 BIn[3] = Tmp; // Restore character 119 BIn += 3; // Skip over handled chars 120 ++BOut; 121 } else { 122 *BOut++ = *BIn++; 123 } 124 } else { 125 *BOut++ = *BIn++; 126 } 127 } 128 Str.resize(BOut-Buffer); 129} 130 131/// isLabelChar - Return true for [-a-zA-Z$._0-9]. 132static bool isLabelChar(char C) { 133 return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; 134} 135 136 137/// isLabelTail - Return true if this pointer points to a valid end of a label. 138static const char *isLabelTail(const char *CurPtr) { 139 while (1) { 140 if (CurPtr[0] == ':') return CurPtr+1; 141 if (!isLabelChar(CurPtr[0])) return 0; 142 ++CurPtr; 143 } 144} 145 146 147 148//===----------------------------------------------------------------------===// 149// Lexer definition. 150//===----------------------------------------------------------------------===// 151 152// FIXME: REMOVE THIS. 153#define YYEOF 0 154#define YYERROR -2 155 156LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) { 157 CurPtr = CurBuf->getBufferStart(); 158} 159 160std::string LLLexer::getFilename() const { 161 return CurBuf->getBufferIdentifier(); 162} 163 164int LLLexer::getNextChar() { 165 char CurChar = *CurPtr++; 166 switch (CurChar) { 167 default: return (unsigned char)CurChar; 168 case 0: 169 // A nul character in the stream is either the end of the current buffer or 170 // a random nul in the file. Disambiguate that here. 171 if (CurPtr-1 != CurBuf->getBufferEnd()) 172 return 0; // Just whitespace. 173 174 // Otherwise, return end of file. 175 --CurPtr; // Another call to lex will return EOF again. 176 return EOF; 177 case '\n': 178 case '\r': 179 // Handle the newline character by ignoring it and incrementing the line 180 // count. However, be careful about 'dos style' files with \n\r in them. 181 // Only treat a \n\r or \r\n as a single line. 182 if ((*CurPtr == '\n' || (*CurPtr == '\r')) && 183 *CurPtr != CurChar) 184 ++CurPtr; // Eat the two char newline sequence. 185 186 ++CurLineNo; 187 return '\n'; 188 } 189} 190 191 192int LLLexer::LexToken() { 193 TokStart = CurPtr; 194 195 int CurChar = getNextChar(); 196 197 switch (CurChar) { 198 default: 199 // Handle letters: [a-zA-Z_] 200 if (isalpha(CurChar) || CurChar == '_') 201 return LexIdentifier(); 202 203 return CurChar; 204 case EOF: return YYEOF; 205 case 0: 206 case ' ': 207 case '\t': 208 case '\n': 209 case '\r': 210 // Ignore whitespace. 211 return LexToken(); 212 case '+': return LexPositive(); 213 case '@': return LexAt(); 214 case '%': return LexPercent(); 215 case '"': return LexQuote(); 216 case '.': 217 if (const char *Ptr = isLabelTail(CurPtr)) { 218 CurPtr = Ptr; 219 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 220 return LABELSTR; 221 } 222 if (CurPtr[0] == '.' && CurPtr[1] == '.') { 223 CurPtr += 2; 224 return DOTDOTDOT; 225 } 226 return '.'; 227 case '$': 228 if (const char *Ptr = isLabelTail(CurPtr)) { 229 CurPtr = Ptr; 230 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 231 return LABELSTR; 232 } 233 return '$'; 234 case ';': 235 SkipLineComment(); 236 return LexToken(); 237 case '0': case '1': case '2': case '3': case '4': 238 case '5': case '6': case '7': case '8': case '9': 239 case '-': 240 return LexDigitOrNegative(); 241 } 242} 243 244void LLLexer::SkipLineComment() { 245 while (1) { 246 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) 247 return; 248 } 249} 250 251/// LexAt - Lex all tokens that start with an @ character: 252/// AtStringConstant @\"[^\"]*\" 253/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]* 254/// GlobalVarID @[0-9]+ 255int LLLexer::LexAt() { 256 // Handle AtStringConstant: @\"[^\"]*\" 257 if (CurPtr[0] == '"') { 258 ++CurPtr; 259 260 while (1) { 261 int CurChar = getNextChar(); 262 263 if (CurChar == EOF) { 264 GenerateError("End of file in global variable name"); 265 return YYERROR; 266 } 267 if (CurChar == '"') { 268 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 269 UnEscapeLexed(*llvmAsmlval.StrVal); 270 return ATSTRINGCONSTANT; 271 } 272 } 273 } 274 275 // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* 276 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 277 CurPtr[0] == '.' || CurPtr[0] == '_') { 278 ++CurPtr; 279 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 280 CurPtr[0] == '.' || CurPtr[0] == '_') 281 ++CurPtr; 282 283 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @ 284 return GLOBALVAR; 285 } 286 287 // Handle GlobalVarID: @[0-9]+ 288 if (isdigit(CurPtr[0])) { 289 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 290 /*empty*/; 291 292 uint64_t Val = atoull(TokStart+1, CurPtr); 293 if ((unsigned)Val != Val) 294 GenerateError("Invalid value number (too large)!"); 295 llvmAsmlval.UIntVal = unsigned(Val); 296 return GLOBALVAL_ID; 297 } 298 299 return '@'; 300} 301 302 303/// LexPercent - Lex all tokens that start with a % character: 304/// PctStringConstant %\"[^\"]*\" 305/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]* 306/// LocalVarID %[0-9]+ 307int LLLexer::LexPercent() { 308 // Handle PctStringConstant: %\"[^\"]*\" 309 if (CurPtr[0] == '"') { 310 ++CurPtr; 311 312 while (1) { 313 int CurChar = getNextChar(); 314 315 if (CurChar == EOF) { 316 GenerateError("End of file in local variable name"); 317 return YYERROR; 318 } 319 if (CurChar == '"') { 320 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 321 UnEscapeLexed(*llvmAsmlval.StrVal); 322 return PCTSTRINGCONSTANT; 323 } 324 } 325 } 326 327 // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* 328 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 329 CurPtr[0] == '.' || CurPtr[0] == '_') { 330 ++CurPtr; 331 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 332 CurPtr[0] == '.' || CurPtr[0] == '_') 333 ++CurPtr; 334 335 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip % 336 return LOCALVAR; 337 } 338 339 // Handle LocalVarID: %[0-9]+ 340 if (isdigit(CurPtr[0])) { 341 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 342 /*empty*/; 343 344 uint64_t Val = atoull(TokStart+1, CurPtr); 345 if ((unsigned)Val != Val) 346 GenerateError("Invalid value number (too large)!"); 347 llvmAsmlval.UIntVal = unsigned(Val); 348 return LOCALVAL_ID; 349 } 350 351 return '%'; 352} 353 354/// LexQuote - Lex all tokens that start with a " character: 355/// QuoteLabel "[^"]+": 356/// StringConstant "[^"]*" 357int LLLexer::LexQuote() { 358 while (1) { 359 int CurChar = getNextChar(); 360 361 if (CurChar == EOF) { 362 GenerateError("End of file in quoted string"); 363 return YYERROR; 364 } 365 366 if (CurChar != '"') continue; 367 368 if (CurPtr[0] != ':') { 369 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1); 370 UnEscapeLexed(*llvmAsmlval.StrVal); 371 return STRINGCONSTANT; 372 } 373 374 ++CurPtr; 375 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2); 376 UnEscapeLexed(*llvmAsmlval.StrVal); 377 return LABELSTR; 378 } 379} 380 381static bool JustWhitespaceNewLine(const char *&Ptr) { 382 const char *ThisPtr = Ptr; 383 while (*ThisPtr == ' ' || *ThisPtr == '\t') 384 ++ThisPtr; 385 if (*ThisPtr == '\n' || *ThisPtr == '\r') { 386 Ptr = ThisPtr; 387 return true; 388 } 389 return false; 390} 391 392 393/// LexIdentifier: Handle several related productions: 394/// Label [-a-zA-Z$._0-9]+: 395/// IntegerType i[0-9]+ 396/// Keyword sdiv, float, ... 397/// HexIntConstant [us]0x[0-9A-Fa-f]+ 398int LLLexer::LexIdentifier() { 399 const char *StartChar = CurPtr; 400 const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; 401 const char *KeywordEnd = 0; 402 403 for (; isLabelChar(*CurPtr); ++CurPtr) { 404 // If we decide this is an integer, remember the end of the sequence. 405 if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; 406 if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; 407 } 408 409 // If we stopped due to a colon, this really is a label. 410 if (*CurPtr == ':') { 411 llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++); 412 return LABELSTR; 413 } 414 415 // Otherwise, this wasn't a label. If this was valid as an integer type, 416 // return it. 417 if (IntEnd == 0) IntEnd = CurPtr; 418 if (IntEnd != StartChar) { 419 CurPtr = IntEnd; 420 uint64_t NumBits = atoull(StartChar, CurPtr); 421 if (NumBits < IntegerType::MIN_INT_BITS || 422 NumBits > IntegerType::MAX_INT_BITS) { 423 GenerateError("Bitwidth for integer type out of range!"); 424 return YYERROR; 425 } 426 const Type* Ty = IntegerType::get(NumBits); 427 llvmAsmlval.PrimType = Ty; 428 return INTTYPE; 429 } 430 431 // Otherwise, this was a letter sequence. See which keyword this is. 432 if (KeywordEnd == 0) KeywordEnd = CurPtr; 433 CurPtr = KeywordEnd; 434 --StartChar; 435 unsigned Len = CurPtr-StartChar; 436#define KEYWORD(STR, TOK) \ 437 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK; 438 439 KEYWORD("begin", BEGINTOK); 440 KEYWORD("end", ENDTOK); 441 KEYWORD("true", TRUETOK); 442 KEYWORD("false", FALSETOK); 443 KEYWORD("declare", DECLARE); 444 KEYWORD("define", DEFINE); 445 KEYWORD("global", GLOBAL); 446 KEYWORD("constant", CONSTANT); 447 448 KEYWORD("internal", INTERNAL); 449 KEYWORD("linkonce", LINKONCE); 450 KEYWORD("weak", WEAK); 451 KEYWORD("appending", APPENDING); 452 KEYWORD("dllimport", DLLIMPORT); 453 KEYWORD("dllexport", DLLEXPORT); 454 KEYWORD("hidden", HIDDEN); 455 KEYWORD("protected", PROTECTED); 456 KEYWORD("extern_weak", EXTERN_WEAK); 457 KEYWORD("external", EXTERNAL); 458 KEYWORD("thread_local", THREAD_LOCAL); 459 KEYWORD("zeroinitializer", ZEROINITIALIZER); 460 KEYWORD("undef", UNDEF); 461 KEYWORD("null", NULL_TOK); 462 KEYWORD("to", TO); 463 KEYWORD("tail", TAIL); 464 KEYWORD("target", TARGET); 465 KEYWORD("triple", TRIPLE); 466 KEYWORD("deplibs", DEPLIBS); 467 KEYWORD("datalayout", DATALAYOUT); 468 KEYWORD("volatile", VOLATILE); 469 KEYWORD("align", ALIGN); 470 KEYWORD("addrspace", ADDRSPACE); 471 KEYWORD("section", SECTION); 472 KEYWORD("alias", ALIAS); 473 KEYWORD("module", MODULE); 474 KEYWORD("asm", ASM_TOK); 475 KEYWORD("sideeffect", SIDEEFFECT); 476 KEYWORD("gc", GC); 477 478 KEYWORD("cc", CC_TOK); 479 KEYWORD("ccc", CCC_TOK); 480 KEYWORD("fastcc", FASTCC_TOK); 481 KEYWORD("coldcc", COLDCC_TOK); 482 KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK); 483 KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK); 484 485 KEYWORD("signext", SIGNEXT); 486 KEYWORD("zeroext", ZEROEXT); 487 KEYWORD("inreg", INREG); 488 KEYWORD("sret", SRET); 489 KEYWORD("nounwind", NOUNWIND); 490 KEYWORD("noreturn", NORETURN); 491 KEYWORD("noalias", NOALIAS); 492 KEYWORD("byval", BYVAL); 493 KEYWORD("nest", NEST); 494 KEYWORD("readnone", READNONE); 495 KEYWORD("readonly", READONLY); 496 497 KEYWORD("type", TYPE); 498 KEYWORD("opaque", OPAQUE); 499 500 KEYWORD("eq" , EQ); 501 KEYWORD("ne" , NE); 502 KEYWORD("slt", SLT); 503 KEYWORD("sgt", SGT); 504 KEYWORD("sle", SLE); 505 KEYWORD("sge", SGE); 506 KEYWORD("ult", ULT); 507 KEYWORD("ugt", UGT); 508 KEYWORD("ule", ULE); 509 KEYWORD("uge", UGE); 510 KEYWORD("oeq", OEQ); 511 KEYWORD("one", ONE); 512 KEYWORD("olt", OLT); 513 KEYWORD("ogt", OGT); 514 KEYWORD("ole", OLE); 515 KEYWORD("oge", OGE); 516 KEYWORD("ord", ORD); 517 KEYWORD("uno", UNO); 518 KEYWORD("ueq", UEQ); 519 KEYWORD("une", UNE); 520#undef KEYWORD 521 522 // Keywords for types. 523#define TYPEKEYWORD(STR, LLVMTY, TOK) \ 524 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 525 llvmAsmlval.PrimType = LLVMTY; return TOK; } 526 TYPEKEYWORD("void", Type::VoidTy, VOID); 527 TYPEKEYWORD("float", Type::FloatTy, FLOAT); 528 TYPEKEYWORD("double", Type::DoubleTy, DOUBLE); 529 TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80); 530 TYPEKEYWORD("fp128", Type::FP128Ty, FP128); 531 TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128); 532 TYPEKEYWORD("label", Type::LabelTy, LABEL); 533#undef TYPEKEYWORD 534 535 // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is 536 // to avoid conflicting with the sext/zext instructions, below. 537 if (Len == 4 && !memcmp(StartChar, "sext", 4)) { 538 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 539 if (JustWhitespaceNewLine(CurPtr)) 540 return SIGNEXT; 541 } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { 542 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 543 if (JustWhitespaceNewLine(CurPtr)) 544 return ZEROEXT; 545 } 546 547 // Keywords for instructions. 548#define INSTKEYWORD(STR, type, Enum, TOK) \ 549 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 550 llvmAsmlval.type = Instruction::Enum; return TOK; } 551 552 INSTKEYWORD("add", BinaryOpVal, Add, ADD); 553 INSTKEYWORD("sub", BinaryOpVal, Sub, SUB); 554 INSTKEYWORD("mul", BinaryOpVal, Mul, MUL); 555 INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV); 556 INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV); 557 INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV); 558 INSTKEYWORD("urem", BinaryOpVal, URem, UREM); 559 INSTKEYWORD("srem", BinaryOpVal, SRem, SREM); 560 INSTKEYWORD("frem", BinaryOpVal, FRem, FREM); 561 INSTKEYWORD("shl", BinaryOpVal, Shl, SHL); 562 INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR); 563 INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR); 564 INSTKEYWORD("and", BinaryOpVal, And, AND); 565 INSTKEYWORD("or", BinaryOpVal, Or , OR ); 566 INSTKEYWORD("xor", BinaryOpVal, Xor, XOR); 567 INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP); 568 INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP); 569 570 INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK); 571 INSTKEYWORD("call", OtherOpVal, Call, CALL); 572 INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC); 573 INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT); 574 INSTKEYWORD("sext", CastOpVal, SExt, SEXT); 575 INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC); 576 INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT); 577 INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP); 578 INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP); 579 INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI); 580 INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI); 581 INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR); 582 INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT); 583 INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST); 584 INSTKEYWORD("select", OtherOpVal, Select, SELECT); 585 INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG); 586 INSTKEYWORD("ret", TermOpVal, Ret, RET); 587 INSTKEYWORD("br", TermOpVal, Br, BR); 588 INSTKEYWORD("switch", TermOpVal, Switch, SWITCH); 589 INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE); 590 INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND); 591 INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE); 592 593 INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC); 594 INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA); 595 INSTKEYWORD("free", MemOpVal, Free, FREE); 596 INSTKEYWORD("load", MemOpVal, Load, LOAD); 597 INSTKEYWORD("store", MemOpVal, Store, STORE); 598 INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR); 599 600 INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT); 601 INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT); 602 INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR); 603 INSTKEYWORD("getresult", OtherOpVal, GetResult, GETRESULT); 604#undef INSTKEYWORD 605 606 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by 607 // the CFE to avoid forcing it to deal with 64-bit numbers. 608 if ((TokStart[0] == 'u' || TokStart[0] == 's') && 609 TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { 610 int len = CurPtr-TokStart-3; 611 uint32_t bits = len * 4; 612 APInt Tmp(bits, TokStart+3, len, 16); 613 uint32_t activeBits = Tmp.getActiveBits(); 614 if (activeBits > 0 && activeBits < bits) 615 Tmp.trunc(activeBits); 616 if (Tmp.getBitWidth() > 64) { 617 llvmAsmlval.APIntVal = new APInt(Tmp); 618 return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL; 619 } else if (TokStart[0] == 's') { 620 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 621 return ESINT64VAL; 622 } else { 623 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 624 return EUINT64VAL; 625 } 626 } 627 628 // If this is "cc1234", return this as just "cc". 629 if (TokStart[0] == 'c' && TokStart[1] == 'c') { 630 CurPtr = TokStart+2; 631 return CC_TOK; 632 } 633 634 // If this starts with "call", return it as CALL. This is to support old 635 // broken .ll files. FIXME: remove this with LLVM 3.0. 636 if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) { 637 CurPtr = TokStart+4; 638 llvmAsmlval.OtherOpVal = Instruction::Call; 639 return CALL; 640 } 641 642 // Finally, if this isn't known, return just a single character. 643 CurPtr = TokStart+1; 644 return TokStart[0]; 645} 646 647 648/// Lex0x: Handle productions that start with 0x, knowing that it matches and 649/// that this is not a label: 650/// HexFPConstant 0x[0-9A-Fa-f]+ 651/// HexFP80Constant 0xK[0-9A-Fa-f]+ 652/// HexFP128Constant 0xL[0-9A-Fa-f]+ 653/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 654int LLLexer::Lex0x() { 655 CurPtr = TokStart + 2; 656 657 char Kind; 658 if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { 659 Kind = *CurPtr++; 660 } else { 661 Kind = 'J'; 662 } 663 664 if (!isxdigit(CurPtr[0])) { 665 // Bad token, return it as just zero. 666 CurPtr = TokStart+1; 667 return '0'; 668 } 669 670 while (isxdigit(CurPtr[0])) 671 ++CurPtr; 672 673 if (Kind == 'J') { 674 // HexFPConstant - Floating point constant represented in IEEE format as a 675 // hexadecimal number for when exponential notation is not precise enough. 676 // Float and double only. 677 llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr)); 678 return FPVAL; 679 } 680 681 uint64_t Pair[2]; 682 HexToIntPair(TokStart+3, CurPtr, Pair); 683 switch (Kind) { 684 default: assert(0 && "Unknown kind!"); 685 case 'K': 686 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) 687 llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair)); 688 return FPVAL; 689 case 'L': 690 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) 691 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true); 692 return FPVAL; 693 case 'M': 694 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) 695 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair)); 696 return FPVAL; 697 } 698} 699 700/// LexIdentifier: Handle several related productions: 701/// Label [-a-zA-Z$._0-9]+: 702/// NInteger -[0-9]+ 703/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 704/// PInteger [0-9]+ 705/// HexFPConstant 0x[0-9A-Fa-f]+ 706/// HexFP80Constant 0xK[0-9A-Fa-f]+ 707/// HexFP128Constant 0xL[0-9A-Fa-f]+ 708/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 709int LLLexer::LexDigitOrNegative() { 710 // If the letter after the negative is a number, this is probably a label. 711 if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { 712 // Okay, this is not a number after the -, it's probably a label. 713 if (const char *End = isLabelTail(CurPtr)) { 714 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 715 CurPtr = End; 716 return LABELSTR; 717 } 718 719 return CurPtr[-1]; 720 } 721 722 // At this point, it is either a label, int or fp constant. 723 724 // Skip digits, we have at least one. 725 for (; isdigit(CurPtr[0]); ++CurPtr) 726 /*empty*/; 727 728 // Check to see if this really is a label afterall, e.g. "-1:". 729 if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { 730 if (const char *End = isLabelTail(CurPtr)) { 731 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 732 CurPtr = End; 733 return LABELSTR; 734 } 735 } 736 737 // If the next character is a '.', then it is a fp value, otherwise its 738 // integer. 739 if (CurPtr[0] != '.') { 740 if (TokStart[0] == '0' && TokStart[1] == 'x') 741 return Lex0x(); 742 unsigned Len = CurPtr-TokStart; 743 uint32_t numBits = ((Len * 64) / 19) + 2; 744 APInt Tmp(numBits, TokStart, Len, 10); 745 if (TokStart[0] == '-') { 746 uint32_t minBits = Tmp.getMinSignedBits(); 747 if (minBits > 0 && minBits < numBits) 748 Tmp.trunc(minBits); 749 if (Tmp.getBitWidth() > 64) { 750 llvmAsmlval.APIntVal = new APInt(Tmp); 751 return ESAPINTVAL; 752 } else { 753 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 754 return ESINT64VAL; 755 } 756 } else { 757 uint32_t activeBits = Tmp.getActiveBits(); 758 if (activeBits > 0 && activeBits < numBits) 759 Tmp.trunc(activeBits); 760 if (Tmp.getBitWidth() > 64) { 761 llvmAsmlval.APIntVal = new APInt(Tmp); 762 return EUAPINTVAL; 763 } else { 764 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 765 return EUINT64VAL; 766 } 767 } 768 } 769 770 ++CurPtr; 771 772 // Skip over [0-9]*([eE][-+]?[0-9]+)? 773 while (isdigit(CurPtr[0])) ++CurPtr; 774 775 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 776 if (isdigit(CurPtr[1]) || 777 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 778 CurPtr += 2; 779 while (isdigit(CurPtr[0])) ++CurPtr; 780 } 781 } 782 783 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 784 return FPVAL; 785} 786 787/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 788int LLLexer::LexPositive() { 789 // If the letter after the negative is a number, this is probably not a 790 // label. 791 if (!isdigit(CurPtr[0])) 792 return CurPtr[-1]; 793 794 // Skip digits. 795 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 796 /*empty*/; 797 798 // At this point, we need a '.'. 799 if (CurPtr[0] != '.') { 800 CurPtr = TokStart+1; 801 return TokStart[0]; 802 } 803 804 ++CurPtr; 805 806 // Skip over [0-9]*([eE][-+]?[0-9]+)? 807 while (isdigit(CurPtr[0])) ++CurPtr; 808 809 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 810 if (isdigit(CurPtr[1]) || 811 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 812 CurPtr += 2; 813 while (isdigit(CurPtr[0])) ++CurPtr; 814 } 815 } 816 817 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 818 return FPVAL; 819} 820 821 822//===----------------------------------------------------------------------===// 823// Define the interface to this file. 824//===----------------------------------------------------------------------===// 825 826static LLLexer *TheLexer; 827 828void InitLLLexer(llvm::MemoryBuffer *MB) { 829 assert(TheLexer == 0 && "LL Lexer isn't reentrant yet"); 830 TheLexer = new LLLexer(MB); 831} 832 833int llvmAsmlex() { 834 return TheLexer->LexToken(); 835} 836const char *LLLgetTokenStart() { return TheLexer->getTokStart(); } 837unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); } 838std::string LLLgetFilename() { return TheLexer->getFilename(); } 839unsigned LLLgetLineNo() { return TheLexer->getLineNo(); } 840 841void FreeLexer() { 842 delete TheLexer; 843 TheLexer = 0; 844} 845