LLLexer.cpp revision aafce77b17d340aace52bcd49d1944109d82f14a
1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Implement the Lexer for .ll files. 11// 12//===----------------------------------------------------------------------===// 13 14#include "LLLexer.h" 15#include "ParserInternals.h" 16#include "llvm/Support/MemoryBuffer.h" 17#include "llvm/Support/MathExtras.h" 18 19#include <list> 20#include "llvmAsmParser.h" 21 22#include <cstring> 23using namespace llvm; 24 25//===----------------------------------------------------------------------===// 26// Helper functions. 27//===----------------------------------------------------------------------===// 28 29// atoull - Convert an ascii string of decimal digits into the unsigned long 30// long representation... this does not have to do input error checking, 31// because we know that the input will be matched by a suitable regex... 32// 33static uint64_t atoull(const char *Buffer, const char *End) { 34 uint64_t Result = 0; 35 for (; Buffer != End; Buffer++) { 36 uint64_t OldRes = Result; 37 Result *= 10; 38 Result += *Buffer-'0'; 39 if (Result < OldRes) { // Uh, oh, overflow detected!!! 40 GenerateError("constant bigger than 64 bits detected!"); 41 return 0; 42 } 43 } 44 return Result; 45} 46 47static uint64_t HexIntToVal(const char *Buffer, const char *End) { 48 uint64_t Result = 0; 49 for (; Buffer != End; ++Buffer) { 50 uint64_t OldRes = Result; 51 Result *= 16; 52 char C = *Buffer; 53 if (C >= '0' && C <= '9') 54 Result += C-'0'; 55 else if (C >= 'A' && C <= 'F') 56 Result += C-'A'+10; 57 else if (C >= 'a' && C <= 'f') 58 Result += C-'a'+10; 59 60 if (Result < OldRes) { // Uh, oh, overflow detected!!! 61 GenerateError("constant bigger than 64 bits detected!"); 62 return 0; 63 } 64 } 65 return Result; 66} 67 68// HexToFP - Convert the ascii string in hexadecimal format to the floating 69// point representation of it. 70// 71static double HexToFP(const char *Buffer, const char *End) { 72 return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double 73} 74 75static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ 76 Pair[0] = 0; 77 for (int i=0; i<16; i++, Buffer++) { 78 assert(Buffer != End); 79 Pair[0] *= 16; 80 char C = *Buffer; 81 if (C >= '0' && C <= '9') 82 Pair[0] += C-'0'; 83 else if (C >= 'A' && C <= 'F') 84 Pair[0] += C-'A'+10; 85 else if (C >= 'a' && C <= 'f') 86 Pair[0] += C-'a'+10; 87 } 88 Pair[1] = 0; 89 for (int i=0; i<16 && Buffer != End; i++, Buffer++) { 90 Pair[1] *= 16; 91 char C = *Buffer; 92 if (C >= '0' && C <= '9') 93 Pair[1] += C-'0'; 94 else if (C >= 'A' && C <= 'F') 95 Pair[1] += C-'A'+10; 96 else if (C >= 'a' && C <= 'f') 97 Pair[1] += C-'a'+10; 98 } 99 if (Buffer != End) 100 GenerateError("constant bigger than 128 bits detected!"); 101} 102 103// UnEscapeLexed - Run through the specified buffer and change \xx codes to the 104// appropriate character. 105static void UnEscapeLexed(std::string &Str) { 106 if (Str.empty()) return; 107 108 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); 109 char *BOut = Buffer; 110 for (char *BIn = Buffer; BIn != EndBuffer; ) { 111 if (BIn[0] == '\\') { 112 if (BIn < EndBuffer-1 && BIn[1] == '\\') { 113 *BOut++ = '\\'; // Two \ becomes one 114 BIn += 2; 115 } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { 116 char Tmp = BIn[3]; BIn[3] = 0; // Terminate string 117 *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number 118 BIn[3] = Tmp; // Restore character 119 BIn += 3; // Skip over handled chars 120 ++BOut; 121 } else { 122 *BOut++ = *BIn++; 123 } 124 } else { 125 *BOut++ = *BIn++; 126 } 127 } 128 Str.resize(BOut-Buffer); 129} 130 131/// isLabelChar - Return true for [-a-zA-Z$._0-9]. 132static bool isLabelChar(char C) { 133 return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; 134} 135 136 137/// isLabelTail - Return true if this pointer points to a valid end of a label. 138static const char *isLabelTail(const char *CurPtr) { 139 while (1) { 140 if (CurPtr[0] == ':') return CurPtr+1; 141 if (!isLabelChar(CurPtr[0])) return 0; 142 ++CurPtr; 143 } 144} 145 146 147 148//===----------------------------------------------------------------------===// 149// Lexer definition. 150//===----------------------------------------------------------------------===// 151 152// FIXME: REMOVE THIS. 153#define YYEOF 0 154#define YYERROR -2 155 156LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) { 157 CurPtr = CurBuf->getBufferStart(); 158} 159 160std::string LLLexer::getFilename() const { 161 return CurBuf->getBufferIdentifier(); 162} 163 164int LLLexer::getNextChar() { 165 char CurChar = *CurPtr++; 166 switch (CurChar) { 167 default: return (unsigned char)CurChar; 168 case 0: 169 // A nul character in the stream is either the end of the current buffer or 170 // a random nul in the file. Disambiguate that here. 171 if (CurPtr-1 != CurBuf->getBufferEnd()) 172 return 0; // Just whitespace. 173 174 // Otherwise, return end of file. 175 --CurPtr; // Another call to lex will return EOF again. 176 return EOF; 177 case '\n': 178 case '\r': 179 // Handle the newline character by ignoring it and incrementing the line 180 // count. However, be careful about 'dos style' files with \n\r in them. 181 // Only treat a \n\r or \r\n as a single line. 182 if ((*CurPtr == '\n' || (*CurPtr == '\r')) && 183 *CurPtr != CurChar) 184 ++CurPtr; // Eat the two char newline sequence. 185 186 ++CurLineNo; 187 return '\n'; 188 } 189} 190 191 192int LLLexer::LexToken() { 193 TokStart = CurPtr; 194 195 int CurChar = getNextChar(); 196 197 switch (CurChar) { 198 default: 199 // Handle letters: [a-zA-Z_] 200 if (isalpha(CurChar) || CurChar == '_') 201 return LexIdentifier(); 202 203 return CurChar; 204 case EOF: return YYEOF; 205 case 0: 206 case ' ': 207 case '\t': 208 case '\n': 209 case '\r': 210 // Ignore whitespace. 211 return LexToken(); 212 case '+': return LexPositive(); 213 case '@': return LexAt(); 214 case '%': return LexPercent(); 215 case '"': return LexQuote(); 216 case '.': 217 if (const char *Ptr = isLabelTail(CurPtr)) { 218 CurPtr = Ptr; 219 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 220 return LABELSTR; 221 } 222 if (CurPtr[0] == '.' && CurPtr[1] == '.') { 223 CurPtr += 2; 224 return DOTDOTDOT; 225 } 226 return '.'; 227 case '$': 228 if (const char *Ptr = isLabelTail(CurPtr)) { 229 CurPtr = Ptr; 230 llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); 231 return LABELSTR; 232 } 233 return '$'; 234 case ';': 235 SkipLineComment(); 236 return LexToken(); 237 case '0': case '1': case '2': case '3': case '4': 238 case '5': case '6': case '7': case '8': case '9': 239 case '-': 240 return LexDigitOrNegative(); 241 } 242} 243 244void LLLexer::SkipLineComment() { 245 while (1) { 246 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) 247 return; 248 } 249} 250 251/// LexAt - Lex all tokens that start with an @ character: 252/// AtStringConstant @\"[^\"]*\" 253/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]* 254/// GlobalVarID @[0-9]+ 255int LLLexer::LexAt() { 256 // Handle AtStringConstant: @\"[^\"]*\" 257 if (CurPtr[0] == '"') { 258 ++CurPtr; 259 260 while (1) { 261 int CurChar = getNextChar(); 262 263 if (CurChar == EOF) { 264 GenerateError("End of file in global variable name"); 265 return YYERROR; 266 } 267 if (CurChar == '"') { 268 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 269 UnEscapeLexed(*llvmAsmlval.StrVal); 270 return ATSTRINGCONSTANT; 271 } 272 } 273 } 274 275 // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* 276 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 277 CurPtr[0] == '.' || CurPtr[0] == '_') { 278 ++CurPtr; 279 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 280 CurPtr[0] == '.' || CurPtr[0] == '_') 281 ++CurPtr; 282 283 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @ 284 return GLOBALVAR; 285 } 286 287 // Handle GlobalVarID: @[0-9]+ 288 if (isdigit(CurPtr[0])) { 289 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 290 /*empty*/; 291 292 uint64_t Val = atoull(TokStart+1, CurPtr); 293 if ((unsigned)Val != Val) 294 GenerateError("Invalid value number (too large)!"); 295 llvmAsmlval.UIntVal = unsigned(Val); 296 return GLOBALVAL_ID; 297 } 298 299 return '@'; 300} 301 302 303/// LexPercent - Lex all tokens that start with a % character: 304/// PctStringConstant %\"[^\"]*\" 305/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]* 306/// LocalVarID %[0-9]+ 307int LLLexer::LexPercent() { 308 // Handle PctStringConstant: %\"[^\"]*\" 309 if (CurPtr[0] == '"') { 310 ++CurPtr; 311 312 while (1) { 313 int CurChar = getNextChar(); 314 315 if (CurChar == EOF) { 316 GenerateError("End of file in local variable name"); 317 return YYERROR; 318 } 319 if (CurChar == '"') { 320 llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); 321 UnEscapeLexed(*llvmAsmlval.StrVal); 322 return PCTSTRINGCONSTANT; 323 } 324 } 325 } 326 327 // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* 328 if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 329 CurPtr[0] == '.' || CurPtr[0] == '_') { 330 ++CurPtr; 331 while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || 332 CurPtr[0] == '.' || CurPtr[0] == '_') 333 ++CurPtr; 334 335 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip % 336 return LOCALVAR; 337 } 338 339 // Handle LocalVarID: %[0-9]+ 340 if (isdigit(CurPtr[0])) { 341 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 342 /*empty*/; 343 344 uint64_t Val = atoull(TokStart+1, CurPtr); 345 if ((unsigned)Val != Val) 346 GenerateError("Invalid value number (too large)!"); 347 llvmAsmlval.UIntVal = unsigned(Val); 348 return LOCALVAL_ID; 349 } 350 351 return '%'; 352} 353 354/// LexQuote - Lex all tokens that start with a " character: 355/// QuoteLabel "[^"]+": 356/// StringConstant "[^"]*" 357int LLLexer::LexQuote() { 358 while (1) { 359 int CurChar = getNextChar(); 360 361 if (CurChar == EOF) { 362 GenerateError("End of file in quoted string"); 363 return YYERROR; 364 } 365 366 if (CurChar != '"') continue; 367 368 if (CurPtr[0] != ':') { 369 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1); 370 UnEscapeLexed(*llvmAsmlval.StrVal); 371 return STRINGCONSTANT; 372 } 373 374 ++CurPtr; 375 llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2); 376 UnEscapeLexed(*llvmAsmlval.StrVal); 377 return LABELSTR; 378 } 379} 380 381static bool JustWhitespaceNewLine(const char *&Ptr) { 382 const char *ThisPtr = Ptr; 383 while (*ThisPtr == ' ' || *ThisPtr == '\t') 384 ++ThisPtr; 385 if (*ThisPtr == '\n' || *ThisPtr == '\r') { 386 Ptr = ThisPtr; 387 return true; 388 } 389 return false; 390} 391 392 393/// LexIdentifier: Handle several related productions: 394/// Label [-a-zA-Z$._0-9]+: 395/// IntegerType i[0-9]+ 396/// Keyword sdiv, float, ... 397/// HexIntConstant [us]0x[0-9A-Fa-f]+ 398int LLLexer::LexIdentifier() { 399 const char *StartChar = CurPtr; 400 const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; 401 const char *KeywordEnd = 0; 402 403 for (; isLabelChar(*CurPtr); ++CurPtr) { 404 // If we decide this is an integer, remember the end of the sequence. 405 if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; 406 if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; 407 } 408 409 // If we stopped due to a colon, this really is a label. 410 if (*CurPtr == ':') { 411 llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++); 412 return LABELSTR; 413 } 414 415 // Otherwise, this wasn't a label. If this was valid as an integer type, 416 // return it. 417 if (IntEnd == 0) IntEnd = CurPtr; 418 if (IntEnd != StartChar) { 419 CurPtr = IntEnd; 420 uint64_t NumBits = atoull(StartChar, CurPtr); 421 if (NumBits < IntegerType::MIN_INT_BITS || 422 NumBits > IntegerType::MAX_INT_BITS) { 423 GenerateError("Bitwidth for integer type out of range!"); 424 return YYERROR; 425 } 426 const Type* Ty = IntegerType::get(NumBits); 427 llvmAsmlval.PrimType = Ty; 428 return INTTYPE; 429 } 430 431 // Otherwise, this was a letter sequence. See which keyword this is. 432 if (KeywordEnd == 0) KeywordEnd = CurPtr; 433 CurPtr = KeywordEnd; 434 --StartChar; 435 unsigned Len = CurPtr-StartChar; 436#define KEYWORD(STR, TOK) \ 437 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK; 438 439 KEYWORD("begin", BEGINTOK); 440 KEYWORD("end", ENDTOK); 441 KEYWORD("true", TRUETOK); 442 KEYWORD("false", FALSETOK); 443 KEYWORD("declare", DECLARE); 444 KEYWORD("define", DEFINE); 445 KEYWORD("global", GLOBAL); 446 KEYWORD("constant", CONSTANT); 447 448 KEYWORD("internal", INTERNAL); 449 KEYWORD("linkonce", LINKONCE); 450 KEYWORD("weak", WEAK); 451 KEYWORD("appending", APPENDING); 452 KEYWORD("dllimport", DLLIMPORT); 453 KEYWORD("dllexport", DLLEXPORT); 454 KEYWORD("common", COMMON); 455 KEYWORD("hidden", HIDDEN); 456 KEYWORD("protected", PROTECTED); 457 KEYWORD("extern_weak", EXTERN_WEAK); 458 KEYWORD("external", EXTERNAL); 459 KEYWORD("thread_local", THREAD_LOCAL); 460 KEYWORD("zeroinitializer", ZEROINITIALIZER); 461 KEYWORD("undef", UNDEF); 462 KEYWORD("null", NULL_TOK); 463 KEYWORD("to", TO); 464 KEYWORD("tail", TAIL); 465 KEYWORD("target", TARGET); 466 KEYWORD("triple", TRIPLE); 467 KEYWORD("deplibs", DEPLIBS); 468 KEYWORD("datalayout", DATALAYOUT); 469 KEYWORD("volatile", VOLATILE); 470 KEYWORD("align", ALIGN); 471 KEYWORD("addrspace", ADDRSPACE); 472 KEYWORD("section", SECTION); 473 KEYWORD("alias", ALIAS); 474 KEYWORD("module", MODULE); 475 KEYWORD("asm", ASM_TOK); 476 KEYWORD("sideeffect", SIDEEFFECT); 477 KEYWORD("gc", GC); 478 479 KEYWORD("cc", CC_TOK); 480 KEYWORD("ccc", CCC_TOK); 481 KEYWORD("fastcc", FASTCC_TOK); 482 KEYWORD("coldcc", COLDCC_TOK); 483 KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK); 484 KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK); 485 486 KEYWORD("signext", SIGNEXT); 487 KEYWORD("zeroext", ZEROEXT); 488 KEYWORD("inreg", INREG); 489 KEYWORD("sret", SRET); 490 KEYWORD("nounwind", NOUNWIND); 491 KEYWORD("noreturn", NORETURN); 492 KEYWORD("noalias", NOALIAS); 493 KEYWORD("byval", BYVAL); 494 KEYWORD("nest", NEST); 495 KEYWORD("readnone", READNONE); 496 KEYWORD("readonly", READONLY); 497 498 KEYWORD("type", TYPE); 499 KEYWORD("opaque", OPAQUE); 500 501 KEYWORD("eq" , EQ); 502 KEYWORD("ne" , NE); 503 KEYWORD("slt", SLT); 504 KEYWORD("sgt", SGT); 505 KEYWORD("sle", SLE); 506 KEYWORD("sge", SGE); 507 KEYWORD("ult", ULT); 508 KEYWORD("ugt", UGT); 509 KEYWORD("ule", ULE); 510 KEYWORD("uge", UGE); 511 KEYWORD("oeq", OEQ); 512 KEYWORD("one", ONE); 513 KEYWORD("olt", OLT); 514 KEYWORD("ogt", OGT); 515 KEYWORD("ole", OLE); 516 KEYWORD("oge", OGE); 517 KEYWORD("ord", ORD); 518 KEYWORD("uno", UNO); 519 KEYWORD("ueq", UEQ); 520 KEYWORD("une", UNE); 521#undef KEYWORD 522 523 // Keywords for types. 524#define TYPEKEYWORD(STR, LLVMTY, TOK) \ 525 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 526 llvmAsmlval.PrimType = LLVMTY; return TOK; } 527 TYPEKEYWORD("void", Type::VoidTy, VOID); 528 TYPEKEYWORD("float", Type::FloatTy, FLOAT); 529 TYPEKEYWORD("double", Type::DoubleTy, DOUBLE); 530 TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80); 531 TYPEKEYWORD("fp128", Type::FP128Ty, FP128); 532 TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128); 533 TYPEKEYWORD("label", Type::LabelTy, LABEL); 534#undef TYPEKEYWORD 535 536 // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is 537 // to avoid conflicting with the sext/zext instructions, below. 538 if (Len == 4 && !memcmp(StartChar, "sext", 4)) { 539 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 540 if (JustWhitespaceNewLine(CurPtr)) 541 return SIGNEXT; 542 } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { 543 // Scan CurPtr ahead, seeing if there is just whitespace before the newline. 544 if (JustWhitespaceNewLine(CurPtr)) 545 return ZEROEXT; 546 } 547 548 // Keywords for instructions. 549#define INSTKEYWORD(STR, type, Enum, TOK) \ 550 if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ 551 llvmAsmlval.type = Instruction::Enum; return TOK; } 552 553 INSTKEYWORD("add", BinaryOpVal, Add, ADD); 554 INSTKEYWORD("sub", BinaryOpVal, Sub, SUB); 555 INSTKEYWORD("mul", BinaryOpVal, Mul, MUL); 556 INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV); 557 INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV); 558 INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV); 559 INSTKEYWORD("urem", BinaryOpVal, URem, UREM); 560 INSTKEYWORD("srem", BinaryOpVal, SRem, SREM); 561 INSTKEYWORD("frem", BinaryOpVal, FRem, FREM); 562 INSTKEYWORD("shl", BinaryOpVal, Shl, SHL); 563 INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR); 564 INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR); 565 INSTKEYWORD("and", BinaryOpVal, And, AND); 566 INSTKEYWORD("or", BinaryOpVal, Or , OR ); 567 INSTKEYWORD("xor", BinaryOpVal, Xor, XOR); 568 INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP); 569 INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP); 570 INSTKEYWORD("vicmp", OtherOpVal, VICmp, VICMP); 571 INSTKEYWORD("vfcmp", OtherOpVal, VFCmp, VFCMP); 572 573 INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK); 574 INSTKEYWORD("call", OtherOpVal, Call, CALL); 575 INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC); 576 INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT); 577 INSTKEYWORD("sext", CastOpVal, SExt, SEXT); 578 INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC); 579 INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT); 580 INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP); 581 INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP); 582 INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI); 583 INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI); 584 INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR); 585 INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT); 586 INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST); 587 INSTKEYWORD("select", OtherOpVal, Select, SELECT); 588 INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG); 589 INSTKEYWORD("ret", TermOpVal, Ret, RET); 590 INSTKEYWORD("br", TermOpVal, Br, BR); 591 INSTKEYWORD("switch", TermOpVal, Switch, SWITCH); 592 INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE); 593 INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND); 594 INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE); 595 596 INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC); 597 INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA); 598 INSTKEYWORD("free", MemOpVal, Free, FREE); 599 INSTKEYWORD("load", MemOpVal, Load, LOAD); 600 INSTKEYWORD("store", MemOpVal, Store, STORE); 601 INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR); 602 603 INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT); 604 INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT); 605 INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR); 606 INSTKEYWORD("getresult", OtherOpVal, GetResult, GETRESULT); 607#undef INSTKEYWORD 608 609 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by 610 // the CFE to avoid forcing it to deal with 64-bit numbers. 611 if ((TokStart[0] == 'u' || TokStart[0] == 's') && 612 TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { 613 int len = CurPtr-TokStart-3; 614 uint32_t bits = len * 4; 615 APInt Tmp(bits, TokStart+3, len, 16); 616 uint32_t activeBits = Tmp.getActiveBits(); 617 if (activeBits > 0 && activeBits < bits) 618 Tmp.trunc(activeBits); 619 if (Tmp.getBitWidth() > 64) { 620 llvmAsmlval.APIntVal = new APInt(Tmp); 621 return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL; 622 } else if (TokStart[0] == 's') { 623 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 624 return ESINT64VAL; 625 } else { 626 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 627 return EUINT64VAL; 628 } 629 } 630 631 // If this is "cc1234", return this as just "cc". 632 if (TokStart[0] == 'c' && TokStart[1] == 'c') { 633 CurPtr = TokStart+2; 634 return CC_TOK; 635 } 636 637 // If this starts with "call", return it as CALL. This is to support old 638 // broken .ll files. FIXME: remove this with LLVM 3.0. 639 if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) { 640 CurPtr = TokStart+4; 641 llvmAsmlval.OtherOpVal = Instruction::Call; 642 return CALL; 643 } 644 645 // Finally, if this isn't known, return just a single character. 646 CurPtr = TokStart+1; 647 return TokStart[0]; 648} 649 650 651/// Lex0x: Handle productions that start with 0x, knowing that it matches and 652/// that this is not a label: 653/// HexFPConstant 0x[0-9A-Fa-f]+ 654/// HexFP80Constant 0xK[0-9A-Fa-f]+ 655/// HexFP128Constant 0xL[0-9A-Fa-f]+ 656/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 657int LLLexer::Lex0x() { 658 CurPtr = TokStart + 2; 659 660 char Kind; 661 if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { 662 Kind = *CurPtr++; 663 } else { 664 Kind = 'J'; 665 } 666 667 if (!isxdigit(CurPtr[0])) { 668 // Bad token, return it as just zero. 669 CurPtr = TokStart+1; 670 return '0'; 671 } 672 673 while (isxdigit(CurPtr[0])) 674 ++CurPtr; 675 676 if (Kind == 'J') { 677 // HexFPConstant - Floating point constant represented in IEEE format as a 678 // hexadecimal number for when exponential notation is not precise enough. 679 // Float and double only. 680 llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr)); 681 return FPVAL; 682 } 683 684 uint64_t Pair[2]; 685 HexToIntPair(TokStart+3, CurPtr, Pair); 686 switch (Kind) { 687 default: assert(0 && "Unknown kind!"); 688 case 'K': 689 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) 690 llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair)); 691 return FPVAL; 692 case 'L': 693 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) 694 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true); 695 return FPVAL; 696 case 'M': 697 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) 698 llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair)); 699 return FPVAL; 700 } 701} 702 703/// LexIdentifier: Handle several related productions: 704/// Label [-a-zA-Z$._0-9]+: 705/// NInteger -[0-9]+ 706/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 707/// PInteger [0-9]+ 708/// HexFPConstant 0x[0-9A-Fa-f]+ 709/// HexFP80Constant 0xK[0-9A-Fa-f]+ 710/// HexFP128Constant 0xL[0-9A-Fa-f]+ 711/// HexPPC128Constant 0xM[0-9A-Fa-f]+ 712int LLLexer::LexDigitOrNegative() { 713 // If the letter after the negative is a number, this is probably a label. 714 if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { 715 // Okay, this is not a number after the -, it's probably a label. 716 if (const char *End = isLabelTail(CurPtr)) { 717 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 718 CurPtr = End; 719 return LABELSTR; 720 } 721 722 return CurPtr[-1]; 723 } 724 725 // At this point, it is either a label, int or fp constant. 726 727 // Skip digits, we have at least one. 728 for (; isdigit(CurPtr[0]); ++CurPtr) 729 /*empty*/; 730 731 // Check to see if this really is a label afterall, e.g. "-1:". 732 if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { 733 if (const char *End = isLabelTail(CurPtr)) { 734 llvmAsmlval.StrVal = new std::string(TokStart, End-1); 735 CurPtr = End; 736 return LABELSTR; 737 } 738 } 739 740 // If the next character is a '.', then it is a fp value, otherwise its 741 // integer. 742 if (CurPtr[0] != '.') { 743 if (TokStart[0] == '0' && TokStart[1] == 'x') 744 return Lex0x(); 745 unsigned Len = CurPtr-TokStart; 746 uint32_t numBits = ((Len * 64) / 19) + 2; 747 APInt Tmp(numBits, TokStart, Len, 10); 748 if (TokStart[0] == '-') { 749 uint32_t minBits = Tmp.getMinSignedBits(); 750 if (minBits > 0 && minBits < numBits) 751 Tmp.trunc(minBits); 752 if (Tmp.getBitWidth() > 64) { 753 llvmAsmlval.APIntVal = new APInt(Tmp); 754 return ESAPINTVAL; 755 } else { 756 llvmAsmlval.SInt64Val = Tmp.getSExtValue(); 757 return ESINT64VAL; 758 } 759 } else { 760 uint32_t activeBits = Tmp.getActiveBits(); 761 if (activeBits > 0 && activeBits < numBits) 762 Tmp.trunc(activeBits); 763 if (Tmp.getBitWidth() > 64) { 764 llvmAsmlval.APIntVal = new APInt(Tmp); 765 return EUAPINTVAL; 766 } else { 767 llvmAsmlval.UInt64Val = Tmp.getZExtValue(); 768 return EUINT64VAL; 769 } 770 } 771 } 772 773 ++CurPtr; 774 775 // Skip over [0-9]*([eE][-+]?[0-9]+)? 776 while (isdigit(CurPtr[0])) ++CurPtr; 777 778 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 779 if (isdigit(CurPtr[1]) || 780 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 781 CurPtr += 2; 782 while (isdigit(CurPtr[0])) ++CurPtr; 783 } 784 } 785 786 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 787 return FPVAL; 788} 789 790/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? 791int LLLexer::LexPositive() { 792 // If the letter after the negative is a number, this is probably not a 793 // label. 794 if (!isdigit(CurPtr[0])) 795 return CurPtr[-1]; 796 797 // Skip digits. 798 for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) 799 /*empty*/; 800 801 // At this point, we need a '.'. 802 if (CurPtr[0] != '.') { 803 CurPtr = TokStart+1; 804 return TokStart[0]; 805 } 806 807 ++CurPtr; 808 809 // Skip over [0-9]*([eE][-+]?[0-9]+)? 810 while (isdigit(CurPtr[0])) ++CurPtr; 811 812 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { 813 if (isdigit(CurPtr[1]) || 814 ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { 815 CurPtr += 2; 816 while (isdigit(CurPtr[0])) ++CurPtr; 817 } 818 } 819 820 llvmAsmlval.FPVal = new APFloat(atof(TokStart)); 821 return FPVAL; 822} 823 824 825//===----------------------------------------------------------------------===// 826// Define the interface to this file. 827//===----------------------------------------------------------------------===// 828 829static LLLexer *TheLexer; 830 831void InitLLLexer(llvm::MemoryBuffer *MB) { 832 assert(TheLexer == 0 && "LL Lexer isn't reentrant yet"); 833 TheLexer = new LLLexer(MB); 834} 835 836int llvmAsmlex() { 837 return TheLexer->LexToken(); 838} 839const char *LLLgetTokenStart() { return TheLexer->getTokStart(); } 840unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); } 841std::string LLLgetFilename() { return TheLexer->getFilename(); } 842unsigned LLLgetLineNo() { return TheLexer->getLineNo(); } 843 844void FreeLexer() { 845 delete TheLexer; 846 TheLexer = 0; 847} 848