1#include "llvm/IR/Verifier.h" 2#include "llvm/IR/DerivedTypes.h" 3#include "llvm/IR/IRBuilder.h" 4#include "llvm/IR/LLVMContext.h" 5#include "llvm/IR/Module.h" 6#include <cctype> 7#include <cstdio> 8#include <map> 9#include <string> 10#include <vector> 11using namespace llvm; 12 13//===----------------------------------------------------------------------===// 14// Lexer 15//===----------------------------------------------------------------------===// 16 17// The lexer returns tokens [0-255] if it is an unknown character, otherwise one 18// of these for known things. 19enum Token { 20 tok_eof = -1, 21 22 // commands 23 tok_def = -2, tok_extern = -3, 24 25 // primary 26 tok_identifier = -4, tok_number = -5 27}; 28 29static std::string IdentifierStr; // Filled in if tok_identifier 30static double NumVal; // Filled in if tok_number 31 32/// gettok - Return the next token from standard input. 33static int gettok() { 34 static int LastChar = ' '; 35 36 // Skip any whitespace. 37 while (isspace(LastChar)) 38 LastChar = getchar(); 39 40 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 41 IdentifierStr = LastChar; 42 while (isalnum((LastChar = getchar()))) 43 IdentifierStr += LastChar; 44 45 if (IdentifierStr == "def") return tok_def; 46 if (IdentifierStr == "extern") return tok_extern; 47 return tok_identifier; 48 } 49 50 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 51 std::string NumStr; 52 do { 53 NumStr += LastChar; 54 LastChar = getchar(); 55 } while (isdigit(LastChar) || LastChar == '.'); 56 57 NumVal = strtod(NumStr.c_str(), 0); 58 return tok_number; 59 } 60 61 if (LastChar == '#') { 62 // Comment until end of line. 63 do LastChar = getchar(); 64 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 65 66 if (LastChar != EOF) 67 return gettok(); 68 } 69 70 // Check for end of file. Don't eat the EOF. 71 if (LastChar == EOF) 72 return tok_eof; 73 74 // Otherwise, just return the character as its ascii value. 75 int ThisChar = LastChar; 76 LastChar = getchar(); 77 return ThisChar; 78} 79 80//===----------------------------------------------------------------------===// 81// Abstract Syntax Tree (aka Parse Tree) 82//===----------------------------------------------------------------------===// 83namespace { 84/// ExprAST - Base class for all expression nodes. 85class ExprAST { 86public: 87 virtual ~ExprAST() {} 88 virtual Value *Codegen() = 0; 89}; 90 91/// NumberExprAST - Expression class for numeric literals like "1.0". 92class NumberExprAST : public ExprAST { 93 double Val; 94public: 95 NumberExprAST(double val) : Val(val) {} 96 virtual Value *Codegen(); 97}; 98 99/// VariableExprAST - Expression class for referencing a variable, like "a". 100class VariableExprAST : public ExprAST { 101 std::string Name; 102public: 103 VariableExprAST(const std::string &name) : Name(name) {} 104 virtual Value *Codegen(); 105}; 106 107/// BinaryExprAST - Expression class for a binary operator. 108class BinaryExprAST : public ExprAST { 109 char Op; 110 ExprAST *LHS, *RHS; 111public: 112 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 113 : Op(op), LHS(lhs), RHS(rhs) {} 114 virtual Value *Codegen(); 115}; 116 117/// CallExprAST - Expression class for function calls. 118class CallExprAST : public ExprAST { 119 std::string Callee; 120 std::vector<ExprAST*> Args; 121public: 122 CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) 123 : Callee(callee), Args(args) {} 124 virtual Value *Codegen(); 125}; 126 127/// PrototypeAST - This class represents the "prototype" for a function, 128/// which captures its name, and its argument names (thus implicitly the number 129/// of arguments the function takes). 130class PrototypeAST { 131 std::string Name; 132 std::vector<std::string> Args; 133public: 134 PrototypeAST(const std::string &name, const std::vector<std::string> &args) 135 : Name(name), Args(args) {} 136 137 Function *Codegen(); 138}; 139 140/// FunctionAST - This class represents a function definition itself. 141class FunctionAST { 142 PrototypeAST *Proto; 143 ExprAST *Body; 144public: 145 FunctionAST(PrototypeAST *proto, ExprAST *body) 146 : Proto(proto), Body(body) {} 147 148 Function *Codegen(); 149}; 150} // end anonymous namespace 151 152//===----------------------------------------------------------------------===// 153// Parser 154//===----------------------------------------------------------------------===// 155 156/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 157/// token the parser is looking at. getNextToken reads another token from the 158/// lexer and updates CurTok with its results. 159static int CurTok; 160static int getNextToken() { 161 return CurTok = gettok(); 162} 163 164/// BinopPrecedence - This holds the precedence for each binary operator that is 165/// defined. 166static std::map<char, int> BinopPrecedence; 167 168/// GetTokPrecedence - Get the precedence of the pending binary operator token. 169static int GetTokPrecedence() { 170 if (!isascii(CurTok)) 171 return -1; 172 173 // Make sure it's a declared binop. 174 int TokPrec = BinopPrecedence[CurTok]; 175 if (TokPrec <= 0) return -1; 176 return TokPrec; 177} 178 179/// Error* - These are little helper functions for error handling. 180ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} 181PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } 182FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } 183 184static ExprAST *ParseExpression(); 185 186/// identifierexpr 187/// ::= identifier 188/// ::= identifier '(' expression* ')' 189static ExprAST *ParseIdentifierExpr() { 190 std::string IdName = IdentifierStr; 191 192 getNextToken(); // eat identifier. 193 194 if (CurTok != '(') // Simple variable ref. 195 return new VariableExprAST(IdName); 196 197 // Call. 198 getNextToken(); // eat ( 199 std::vector<ExprAST*> Args; 200 if (CurTok != ')') { 201 while (1) { 202 ExprAST *Arg = ParseExpression(); 203 if (!Arg) return 0; 204 Args.push_back(Arg); 205 206 if (CurTok == ')') break; 207 208 if (CurTok != ',') 209 return Error("Expected ')' or ',' in argument list"); 210 getNextToken(); 211 } 212 } 213 214 // Eat the ')'. 215 getNextToken(); 216 217 return new CallExprAST(IdName, Args); 218} 219 220/// numberexpr ::= number 221static ExprAST *ParseNumberExpr() { 222 ExprAST *Result = new NumberExprAST(NumVal); 223 getNextToken(); // consume the number 224 return Result; 225} 226 227/// parenexpr ::= '(' expression ')' 228static ExprAST *ParseParenExpr() { 229 getNextToken(); // eat (. 230 ExprAST *V = ParseExpression(); 231 if (!V) return 0; 232 233 if (CurTok != ')') 234 return Error("expected ')'"); 235 getNextToken(); // eat ). 236 return V; 237} 238 239/// primary 240/// ::= identifierexpr 241/// ::= numberexpr 242/// ::= parenexpr 243static ExprAST *ParsePrimary() { 244 switch (CurTok) { 245 default: return Error("unknown token when expecting an expression"); 246 case tok_identifier: return ParseIdentifierExpr(); 247 case tok_number: return ParseNumberExpr(); 248 case '(': return ParseParenExpr(); 249 } 250} 251 252/// binoprhs 253/// ::= ('+' primary)* 254static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { 255 // If this is a binop, find its precedence. 256 while (1) { 257 int TokPrec = GetTokPrecedence(); 258 259 // If this is a binop that binds at least as tightly as the current binop, 260 // consume it, otherwise we are done. 261 if (TokPrec < ExprPrec) 262 return LHS; 263 264 // Okay, we know this is a binop. 265 int BinOp = CurTok; 266 getNextToken(); // eat binop 267 268 // Parse the primary expression after the binary operator. 269 ExprAST *RHS = ParsePrimary(); 270 if (!RHS) return 0; 271 272 // If BinOp binds less tightly with RHS than the operator after RHS, let 273 // the pending operator take RHS as its LHS. 274 int NextPrec = GetTokPrecedence(); 275 if (TokPrec < NextPrec) { 276 RHS = ParseBinOpRHS(TokPrec+1, RHS); 277 if (RHS == 0) return 0; 278 } 279 280 // Merge LHS/RHS. 281 LHS = new BinaryExprAST(BinOp, LHS, RHS); 282 } 283} 284 285/// expression 286/// ::= primary binoprhs 287/// 288static ExprAST *ParseExpression() { 289 ExprAST *LHS = ParsePrimary(); 290 if (!LHS) return 0; 291 292 return ParseBinOpRHS(0, LHS); 293} 294 295/// prototype 296/// ::= id '(' id* ')' 297static PrototypeAST *ParsePrototype() { 298 if (CurTok != tok_identifier) 299 return ErrorP("Expected function name in prototype"); 300 301 std::string FnName = IdentifierStr; 302 getNextToken(); 303 304 if (CurTok != '(') 305 return ErrorP("Expected '(' in prototype"); 306 307 std::vector<std::string> ArgNames; 308 while (getNextToken() == tok_identifier) 309 ArgNames.push_back(IdentifierStr); 310 if (CurTok != ')') 311 return ErrorP("Expected ')' in prototype"); 312 313 // success. 314 getNextToken(); // eat ')'. 315 316 return new PrototypeAST(FnName, ArgNames); 317} 318 319/// definition ::= 'def' prototype expression 320static FunctionAST *ParseDefinition() { 321 getNextToken(); // eat def. 322 PrototypeAST *Proto = ParsePrototype(); 323 if (Proto == 0) return 0; 324 325 if (ExprAST *E = ParseExpression()) 326 return new FunctionAST(Proto, E); 327 return 0; 328} 329 330/// toplevelexpr ::= expression 331static FunctionAST *ParseTopLevelExpr() { 332 if (ExprAST *E = ParseExpression()) { 333 // Make an anonymous proto. 334 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); 335 return new FunctionAST(Proto, E); 336 } 337 return 0; 338} 339 340/// external ::= 'extern' prototype 341static PrototypeAST *ParseExtern() { 342 getNextToken(); // eat extern. 343 return ParsePrototype(); 344} 345 346//===----------------------------------------------------------------------===// 347// Code Generation 348//===----------------------------------------------------------------------===// 349 350static Module *TheModule; 351static IRBuilder<> Builder(getGlobalContext()); 352static std::map<std::string, Value*> NamedValues; 353 354Value *ErrorV(const char *Str) { Error(Str); return 0; } 355 356Value *NumberExprAST::Codegen() { 357 return ConstantFP::get(getGlobalContext(), APFloat(Val)); 358} 359 360Value *VariableExprAST::Codegen() { 361 // Look this variable up in the function. 362 Value *V = NamedValues[Name]; 363 return V ? V : ErrorV("Unknown variable name"); 364} 365 366Value *BinaryExprAST::Codegen() { 367 Value *L = LHS->Codegen(); 368 Value *R = RHS->Codegen(); 369 if (L == 0 || R == 0) return 0; 370 371 switch (Op) { 372 case '+': return Builder.CreateFAdd(L, R, "addtmp"); 373 case '-': return Builder.CreateFSub(L, R, "subtmp"); 374 case '*': return Builder.CreateFMul(L, R, "multmp"); 375 case '<': 376 L = Builder.CreateFCmpULT(L, R, "cmptmp"); 377 // Convert bool 0/1 to double 0.0 or 1.0 378 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), 379 "booltmp"); 380 default: return ErrorV("invalid binary operator"); 381 } 382} 383 384Value *CallExprAST::Codegen() { 385 // Look up the name in the global module table. 386 Function *CalleeF = TheModule->getFunction(Callee); 387 if (CalleeF == 0) 388 return ErrorV("Unknown function referenced"); 389 390 // If argument mismatch error. 391 if (CalleeF->arg_size() != Args.size()) 392 return ErrorV("Incorrect # arguments passed"); 393 394 std::vector<Value*> ArgsV; 395 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 396 ArgsV.push_back(Args[i]->Codegen()); 397 if (ArgsV.back() == 0) return 0; 398 } 399 400 return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); 401} 402 403Function *PrototypeAST::Codegen() { 404 // Make the function type: double(double,double) etc. 405 std::vector<Type*> Doubles(Args.size(), 406 Type::getDoubleTy(getGlobalContext())); 407 FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), 408 Doubles, false); 409 410 Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); 411 412 // If F conflicted, there was already something named 'Name'. If it has a 413 // body, don't allow redefinition or reextern. 414 if (F->getName() != Name) { 415 // Delete the one we just made and get the existing one. 416 F->eraseFromParent(); 417 F = TheModule->getFunction(Name); 418 419 // If F already has a body, reject this. 420 if (!F->empty()) { 421 ErrorF("redefinition of function"); 422 return 0; 423 } 424 425 // If F took a different number of args, reject. 426 if (F->arg_size() != Args.size()) { 427 ErrorF("redefinition of function with different # args"); 428 return 0; 429 } 430 } 431 432 // Set names for all arguments. 433 unsigned Idx = 0; 434 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); 435 ++AI, ++Idx) { 436 AI->setName(Args[Idx]); 437 438 // Add arguments to variable symbol table. 439 NamedValues[Args[Idx]] = AI; 440 } 441 442 return F; 443} 444 445Function *FunctionAST::Codegen() { 446 NamedValues.clear(); 447 448 Function *TheFunction = Proto->Codegen(); 449 if (TheFunction == 0) 450 return 0; 451 452 // Create a new basic block to start insertion into. 453 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); 454 Builder.SetInsertPoint(BB); 455 456 if (Value *RetVal = Body->Codegen()) { 457 // Finish off the function. 458 Builder.CreateRet(RetVal); 459 460 // Validate the generated code, checking for consistency. 461 verifyFunction(*TheFunction); 462 463 return TheFunction; 464 } 465 466 // Error reading body, remove function. 467 TheFunction->eraseFromParent(); 468 return 0; 469} 470 471//===----------------------------------------------------------------------===// 472// Top-Level parsing and JIT Driver 473//===----------------------------------------------------------------------===// 474 475static void HandleDefinition() { 476 if (FunctionAST *F = ParseDefinition()) { 477 if (Function *LF = F->Codegen()) { 478 fprintf(stderr, "Read function definition:"); 479 LF->dump(); 480 } 481 } else { 482 // Skip token for error recovery. 483 getNextToken(); 484 } 485} 486 487static void HandleExtern() { 488 if (PrototypeAST *P = ParseExtern()) { 489 if (Function *F = P->Codegen()) { 490 fprintf(stderr, "Read extern: "); 491 F->dump(); 492 } 493 } else { 494 // Skip token for error recovery. 495 getNextToken(); 496 } 497} 498 499static void HandleTopLevelExpression() { 500 // Evaluate a top-level expression into an anonymous function. 501 if (FunctionAST *F = ParseTopLevelExpr()) { 502 if (Function *LF = F->Codegen()) { 503 fprintf(stderr, "Read top-level expression:"); 504 LF->dump(); 505 } 506 } else { 507 // Skip token for error recovery. 508 getNextToken(); 509 } 510} 511 512/// top ::= definition | external | expression | ';' 513static void MainLoop() { 514 while (1) { 515 fprintf(stderr, "ready> "); 516 switch (CurTok) { 517 case tok_eof: return; 518 case ';': getNextToken(); break; // ignore top-level semicolons. 519 case tok_def: HandleDefinition(); break; 520 case tok_extern: HandleExtern(); break; 521 default: HandleTopLevelExpression(); break; 522 } 523 } 524} 525 526//===----------------------------------------------------------------------===// 527// "Library" functions that can be "extern'd" from user code. 528//===----------------------------------------------------------------------===// 529 530/// putchard - putchar that takes a double and returns 0. 531extern "C" 532double putchard(double X) { 533 putchar((char)X); 534 return 0; 535} 536 537//===----------------------------------------------------------------------===// 538// Main driver code. 539//===----------------------------------------------------------------------===// 540 541int main() { 542 LLVMContext &Context = getGlobalContext(); 543 544 // Install standard binary operators. 545 // 1 is lowest precedence. 546 BinopPrecedence['<'] = 10; 547 BinopPrecedence['+'] = 20; 548 BinopPrecedence['-'] = 20; 549 BinopPrecedence['*'] = 40; // highest. 550 551 // Prime the first token. 552 fprintf(stderr, "ready> "); 553 getNextToken(); 554 555 // Make the module, which holds all the code. 556 TheModule = new Module("my cool jit", Context); 557 558 // Run the main "interpreter loop" now. 559 MainLoop(); 560 561 // Print out all of the generated code. 562 TheModule->dump(); 563 564 return 0; 565} 566