toy-jit.cpp revision dbbbccc3492aa7f91f21d8902cfb0b766dabb849
1#define MINIMAL_STDERR_OUTPUT
2
3#include "llvm/Analysis/Passes.h"
4#include "llvm/Analysis/Verifier.h"
5#include "llvm/ExecutionEngine/ExecutionEngine.h"
6#include "llvm/ExecutionEngine/JIT.h"
7#include "llvm/IR/DataLayout.h"
8#include "llvm/IR/DerivedTypes.h"
9#include "llvm/IR/IRBuilder.h"
10#include "llvm/IR/LLVMContext.h"
11#include "llvm/IR/Module.h"
12#include "llvm/IRReader/IRReader.h"
13#include "llvm/PassManager.h"
14#include "llvm/Support/CommandLine.h"
15#include "llvm/Support/raw_ostream.h"
16#include "llvm/Support/SourceMgr.h"
17#include "llvm/Support/TargetSelect.h"
18#include "llvm/Transforms/Scalar.h"
19#include <cstdio>
20#include <map>
21#include <string>
22#include <vector>
23
24using namespace llvm;
25
26//===----------------------------------------------------------------------===//
27// Command-line options
28//===----------------------------------------------------------------------===//
29
30namespace {
31  cl::opt<std::string>
32  InputIR("input-IR",
33              cl::desc("Specify the name of an IR file to load for function definitions"),
34              cl::value_desc("input IR file name"));
35} // namespace
36
37//===----------------------------------------------------------------------===//
38// Lexer
39//===----------------------------------------------------------------------===//
40
41// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
42// of these for known things.
43enum Token {
44  tok_eof = -1,
45
46  // commands
47  tok_def = -2, tok_extern = -3,
48
49  // primary
50  tok_identifier = -4, tok_number = -5,
51
52  // control
53  tok_if = -6, tok_then = -7, tok_else = -8,
54  tok_for = -9, tok_in = -10,
55
56  // operators
57  tok_binary = -11, tok_unary = -12,
58
59  // var definition
60  tok_var = -13
61};
62
63static std::string IdentifierStr;  // Filled in if tok_identifier
64static double NumVal;              // Filled in if tok_number
65
66/// gettok - Return the next token from standard input.
67static int gettok() {
68  static int LastChar = ' ';
69
70  // Skip any whitespace.
71  while (isspace(LastChar))
72    LastChar = getchar();
73
74  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
75    IdentifierStr = LastChar;
76    while (isalnum((LastChar = getchar())))
77      IdentifierStr += LastChar;
78
79    if (IdentifierStr == "def") return tok_def;
80    if (IdentifierStr == "extern") return tok_extern;
81    if (IdentifierStr == "if") return tok_if;
82    if (IdentifierStr == "then") return tok_then;
83    if (IdentifierStr == "else") return tok_else;
84    if (IdentifierStr == "for") return tok_for;
85    if (IdentifierStr == "in") return tok_in;
86    if (IdentifierStr == "binary") return tok_binary;
87    if (IdentifierStr == "unary") return tok_unary;
88    if (IdentifierStr == "var") return tok_var;
89    return tok_identifier;
90  }
91
92  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
93    std::string NumStr;
94    do {
95      NumStr += LastChar;
96      LastChar = getchar();
97    } while (isdigit(LastChar) || LastChar == '.');
98
99    NumVal = strtod(NumStr.c_str(), 0);
100    return tok_number;
101  }
102
103  if (LastChar == '#') {
104    // Comment until end of line.
105    do LastChar = getchar();
106    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
107
108    if (LastChar != EOF)
109      return gettok();
110  }
111
112  // Check for end of file.  Don't eat the EOF.
113  if (LastChar == EOF)
114    return tok_eof;
115
116  // Otherwise, just return the character as its ascii value.
117  int ThisChar = LastChar;
118  LastChar = getchar();
119  return ThisChar;
120}
121
122//===----------------------------------------------------------------------===//
123// Abstract Syntax Tree (aka Parse Tree)
124//===----------------------------------------------------------------------===//
125
126/// ExprAST - Base class for all expression nodes.
127class ExprAST {
128public:
129  virtual ~ExprAST() {}
130  virtual Value *Codegen() = 0;
131};
132
133/// NumberExprAST - Expression class for numeric literals like "1.0".
134class NumberExprAST : public ExprAST {
135  double Val;
136public:
137  NumberExprAST(double val) : Val(val) {}
138  virtual Value *Codegen();
139};
140
141/// VariableExprAST - Expression class for referencing a variable, like "a".
142class VariableExprAST : public ExprAST {
143  std::string Name;
144public:
145  VariableExprAST(const std::string &name) : Name(name) {}
146  const std::string &getName() const { return Name; }
147  virtual Value *Codegen();
148};
149
150/// UnaryExprAST - Expression class for a unary operator.
151class UnaryExprAST : public ExprAST {
152  char Opcode;
153  ExprAST *Operand;
154public:
155  UnaryExprAST(char opcode, ExprAST *operand)
156    : Opcode(opcode), Operand(operand) {}
157  virtual Value *Codegen();
158};
159
160/// BinaryExprAST - Expression class for a binary operator.
161class BinaryExprAST : public ExprAST {
162  char Op;
163  ExprAST *LHS, *RHS;
164public:
165  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
166    : Op(op), LHS(lhs), RHS(rhs) {}
167  virtual Value *Codegen();
168};
169
170/// CallExprAST - Expression class for function calls.
171class CallExprAST : public ExprAST {
172  std::string Callee;
173  std::vector<ExprAST*> Args;
174public:
175  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
176    : Callee(callee), Args(args) {}
177  virtual Value *Codegen();
178};
179
180/// IfExprAST - Expression class for if/then/else.
181class IfExprAST : public ExprAST {
182  ExprAST *Cond, *Then, *Else;
183public:
184  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
185  : Cond(cond), Then(then), Else(_else) {}
186  virtual Value *Codegen();
187};
188
189/// ForExprAST - Expression class for for/in.
190class ForExprAST : public ExprAST {
191  std::string VarName;
192  ExprAST *Start, *End, *Step, *Body;
193public:
194  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
195             ExprAST *step, ExprAST *body)
196    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
197  virtual Value *Codegen();
198};
199
200/// VarExprAST - Expression class for var/in
201class VarExprAST : public ExprAST {
202  std::vector<std::pair<std::string, ExprAST*> > VarNames;
203  ExprAST *Body;
204public:
205  VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
206             ExprAST *body)
207  : VarNames(varnames), Body(body) {}
208
209  virtual Value *Codegen();
210};
211
212/// PrototypeAST - This class represents the "prototype" for a function,
213/// which captures its argument names as well as if it is an operator.
214class PrototypeAST {
215  std::string Name;
216  std::vector<std::string> Args;
217  bool isOperator;
218  unsigned Precedence;  // Precedence if a binary op.
219public:
220  PrototypeAST(const std::string &name, const std::vector<std::string> &args,
221               bool isoperator = false, unsigned prec = 0)
222  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
223
224  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
225  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
226
227  char getOperatorName() const {
228    assert(isUnaryOp() || isBinaryOp());
229    return Name[Name.size()-1];
230  }
231
232  unsigned getBinaryPrecedence() const { return Precedence; }
233
234  Function *Codegen();
235
236  void CreateArgumentAllocas(Function *F);
237};
238
239/// FunctionAST - This class represents a function definition itself.
240class FunctionAST {
241  PrototypeAST *Proto;
242  ExprAST *Body;
243public:
244  FunctionAST(PrototypeAST *proto, ExprAST *body)
245    : Proto(proto), Body(body) {}
246
247  Function *Codegen();
248};
249
250//===----------------------------------------------------------------------===//
251// Parser
252//===----------------------------------------------------------------------===//
253
254/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
255/// token the parser is looking at.  getNextToken reads another token from the
256/// lexer and updates CurTok with its results.
257static int CurTok;
258static int getNextToken() {
259  return CurTok = gettok();
260}
261
262/// BinopPrecedence - This holds the precedence for each binary operator that is
263/// defined.
264static std::map<char, int> BinopPrecedence;
265
266/// GetTokPrecedence - Get the precedence of the pending binary operator token.
267static int GetTokPrecedence() {
268  if (!isascii(CurTok))
269    return -1;
270
271  // Make sure it's a declared binop.
272  int TokPrec = BinopPrecedence[CurTok];
273  if (TokPrec <= 0) return -1;
274  return TokPrec;
275}
276
277/// Error* - These are little helper functions for error handling.
278ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
279PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
280FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
281
282static ExprAST *ParseExpression();
283
284/// identifierexpr
285///   ::= identifier
286///   ::= identifier '(' expression* ')'
287static ExprAST *ParseIdentifierExpr() {
288  std::string IdName = IdentifierStr;
289
290  getNextToken();  // eat identifier.
291
292  if (CurTok != '(') // Simple variable ref.
293    return new VariableExprAST(IdName);
294
295  // Call.
296  getNextToken();  // eat (
297  std::vector<ExprAST*> Args;
298  if (CurTok != ')') {
299    while (1) {
300      ExprAST *Arg = ParseExpression();
301      if (!Arg) return 0;
302      Args.push_back(Arg);
303
304      if (CurTok == ')') break;
305
306      if (CurTok != ',')
307        return Error("Expected ')' or ',' in argument list");
308      getNextToken();
309    }
310  }
311
312  // Eat the ')'.
313  getNextToken();
314
315  return new CallExprAST(IdName, Args);
316}
317
318/// numberexpr ::= number
319static ExprAST *ParseNumberExpr() {
320  ExprAST *Result = new NumberExprAST(NumVal);
321  getNextToken(); // consume the number
322  return Result;
323}
324
325/// parenexpr ::= '(' expression ')'
326static ExprAST *ParseParenExpr() {
327  getNextToken();  // eat (.
328  ExprAST *V = ParseExpression();
329  if (!V) return 0;
330
331  if (CurTok != ')')
332    return Error("expected ')'");
333  getNextToken();  // eat ).
334  return V;
335}
336
337/// ifexpr ::= 'if' expression 'then' expression 'else' expression
338static ExprAST *ParseIfExpr() {
339  getNextToken();  // eat the if.
340
341  // condition.
342  ExprAST *Cond = ParseExpression();
343  if (!Cond) return 0;
344
345  if (CurTok != tok_then)
346    return Error("expected then");
347  getNextToken();  // eat the then
348
349  ExprAST *Then = ParseExpression();
350  if (Then == 0) return 0;
351
352  if (CurTok != tok_else)
353    return Error("expected else");
354
355  getNextToken();
356
357  ExprAST *Else = ParseExpression();
358  if (!Else) return 0;
359
360  return new IfExprAST(Cond, Then, Else);
361}
362
363/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
364static ExprAST *ParseForExpr() {
365  getNextToken();  // eat the for.
366
367  if (CurTok != tok_identifier)
368    return Error("expected identifier after for");
369
370  std::string IdName = IdentifierStr;
371  getNextToken();  // eat identifier.
372
373  if (CurTok != '=')
374    return Error("expected '=' after for");
375  getNextToken();  // eat '='.
376
377
378  ExprAST *Start = ParseExpression();
379  if (Start == 0) return 0;
380  if (CurTok != ',')
381    return Error("expected ',' after for start value");
382  getNextToken();
383
384  ExprAST *End = ParseExpression();
385  if (End == 0) return 0;
386
387  // The step value is optional.
388  ExprAST *Step = 0;
389  if (CurTok == ',') {
390    getNextToken();
391    Step = ParseExpression();
392    if (Step == 0) return 0;
393  }
394
395  if (CurTok != tok_in)
396    return Error("expected 'in' after for");
397  getNextToken();  // eat 'in'.
398
399  ExprAST *Body = ParseExpression();
400  if (Body == 0) return 0;
401
402  return new ForExprAST(IdName, Start, End, Step, Body);
403}
404
405/// varexpr ::= 'var' identifier ('=' expression)?
406//                    (',' identifier ('=' expression)?)* 'in' expression
407static ExprAST *ParseVarExpr() {
408  getNextToken();  // eat the var.
409
410  std::vector<std::pair<std::string, ExprAST*> > VarNames;
411
412  // At least one variable name is required.
413  if (CurTok != tok_identifier)
414    return Error("expected identifier after var");
415
416  while (1) {
417    std::string Name = IdentifierStr;
418    getNextToken();  // eat identifier.
419
420    // Read the optional initializer.
421    ExprAST *Init = 0;
422    if (CurTok == '=') {
423      getNextToken(); // eat the '='.
424
425      Init = ParseExpression();
426      if (Init == 0) return 0;
427    }
428
429    VarNames.push_back(std::make_pair(Name, Init));
430
431    // End of var list, exit loop.
432    if (CurTok != ',') break;
433    getNextToken(); // eat the ','.
434
435    if (CurTok != tok_identifier)
436      return Error("expected identifier list after var");
437  }
438
439  // At this point, we have to have 'in'.
440  if (CurTok != tok_in)
441    return Error("expected 'in' keyword after 'var'");
442  getNextToken();  // eat 'in'.
443
444  ExprAST *Body = ParseExpression();
445  if (Body == 0) return 0;
446
447  return new VarExprAST(VarNames, Body);
448}
449
450/// primary
451///   ::= identifierexpr
452///   ::= numberexpr
453///   ::= parenexpr
454///   ::= ifexpr
455///   ::= forexpr
456///   ::= varexpr
457static ExprAST *ParsePrimary() {
458  switch (CurTok) {
459  default: return Error("unknown token when expecting an expression");
460  case tok_identifier: return ParseIdentifierExpr();
461  case tok_number:     return ParseNumberExpr();
462  case '(':            return ParseParenExpr();
463  case tok_if:         return ParseIfExpr();
464  case tok_for:        return ParseForExpr();
465  case tok_var:        return ParseVarExpr();
466  }
467}
468
469/// unary
470///   ::= primary
471///   ::= '!' unary
472static ExprAST *ParseUnary() {
473  // If the current token is not an operator, it must be a primary expr.
474  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
475    return ParsePrimary();
476
477  // If this is a unary operator, read it.
478  int Opc = CurTok;
479  getNextToken();
480  if (ExprAST *Operand = ParseUnary())
481    return new UnaryExprAST(Opc, Operand);
482  return 0;
483}
484
485/// binoprhs
486///   ::= ('+' unary)*
487static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
488  // If this is a binop, find its precedence.
489  while (1) {
490    int TokPrec = GetTokPrecedence();
491
492    // If this is a binop that binds at least as tightly as the current binop,
493    // consume it, otherwise we are done.
494    if (TokPrec < ExprPrec)
495      return LHS;
496
497    // Okay, we know this is a binop.
498    int BinOp = CurTok;
499    getNextToken();  // eat binop
500
501    // Parse the unary expression after the binary operator.
502    ExprAST *RHS = ParseUnary();
503    if (!RHS) return 0;
504
505    // If BinOp binds less tightly with RHS than the operator after RHS, let
506    // the pending operator take RHS as its LHS.
507    int NextPrec = GetTokPrecedence();
508    if (TokPrec < NextPrec) {
509      RHS = ParseBinOpRHS(TokPrec+1, RHS);
510      if (RHS == 0) return 0;
511    }
512
513    // Merge LHS/RHS.
514    LHS = new BinaryExprAST(BinOp, LHS, RHS);
515  }
516}
517
518/// expression
519///   ::= unary binoprhs
520///
521static ExprAST *ParseExpression() {
522  ExprAST *LHS = ParseUnary();
523  if (!LHS) return 0;
524
525  return ParseBinOpRHS(0, LHS);
526}
527
528/// prototype
529///   ::= id '(' id* ')'
530///   ::= binary LETTER number? (id, id)
531///   ::= unary LETTER (id)
532static PrototypeAST *ParsePrototype() {
533  std::string FnName;
534
535  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
536  unsigned BinaryPrecedence = 30;
537
538  switch (CurTok) {
539  default:
540    return ErrorP("Expected function name in prototype");
541  case tok_identifier:
542    FnName = IdentifierStr;
543    Kind = 0;
544    getNextToken();
545    break;
546  case tok_unary:
547    getNextToken();
548    if (!isascii(CurTok))
549      return ErrorP("Expected unary operator");
550    FnName = "unary";
551    FnName += (char)CurTok;
552    Kind = 1;
553    getNextToken();
554    break;
555  case tok_binary:
556    getNextToken();
557    if (!isascii(CurTok))
558      return ErrorP("Expected binary operator");
559    FnName = "binary";
560    FnName += (char)CurTok;
561    Kind = 2;
562    getNextToken();
563
564    // Read the precedence if present.
565    if (CurTok == tok_number) {
566      if (NumVal < 1 || NumVal > 100)
567        return ErrorP("Invalid precedecnce: must be 1..100");
568      BinaryPrecedence = (unsigned)NumVal;
569      getNextToken();
570    }
571    break;
572  }
573
574  if (CurTok != '(')
575    return ErrorP("Expected '(' in prototype");
576
577  std::vector<std::string> ArgNames;
578  while (getNextToken() == tok_identifier)
579    ArgNames.push_back(IdentifierStr);
580  if (CurTok != ')')
581    return ErrorP("Expected ')' in prototype");
582
583  // success.
584  getNextToken();  // eat ')'.
585
586  // Verify right number of names for operator.
587  if (Kind && ArgNames.size() != Kind)
588    return ErrorP("Invalid number of operands for operator");
589
590  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
591}
592
593/// definition ::= 'def' prototype expression
594static FunctionAST *ParseDefinition() {
595  getNextToken();  // eat def.
596  PrototypeAST *Proto = ParsePrototype();
597  if (Proto == 0) return 0;
598
599  if (ExprAST *E = ParseExpression())
600    return new FunctionAST(Proto, E);
601  return 0;
602}
603
604/// toplevelexpr ::= expression
605static FunctionAST *ParseTopLevelExpr() {
606  if (ExprAST *E = ParseExpression()) {
607    // Make an anonymous proto.
608    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
609    return new FunctionAST(Proto, E);
610  }
611  return 0;
612}
613
614/// external ::= 'extern' prototype
615static PrototypeAST *ParseExtern() {
616  getNextToken();  // eat extern.
617  return ParsePrototype();
618}
619
620//===----------------------------------------------------------------------===//
621// Code Generation
622//===----------------------------------------------------------------------===//
623
624static Module *TheModule;
625static FunctionPassManager *TheFPM;
626static IRBuilder<> Builder(getGlobalContext());
627static std::map<std::string, AllocaInst*> NamedValues;
628
629Value *ErrorV(const char *Str) { Error(Str); return 0; }
630
631/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
632/// the function.  This is used for mutable variables etc.
633static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
634                                          const std::string &VarName) {
635  IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
636                 TheFunction->getEntryBlock().begin());
637  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
638                           VarName.c_str());
639}
640
641Value *NumberExprAST::Codegen() {
642  return ConstantFP::get(getGlobalContext(), APFloat(Val));
643}
644
645Value *VariableExprAST::Codegen() {
646  // Look this variable up in the function.
647  Value *V = NamedValues[Name];
648  if (V == 0) return ErrorV("Unknown variable name");
649
650  // Load the value.
651  return Builder.CreateLoad(V, Name.c_str());
652}
653
654Value *UnaryExprAST::Codegen() {
655  Value *OperandV = Operand->Codegen();
656  if (OperandV == 0) return 0;
657#ifdef USE_MCJIT
658  Function *F = TheHelper->getFunction(MakeLegalFunctionName(std::string("unary")+Opcode));
659#else
660  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
661#endif
662  if (F == 0)
663    return ErrorV("Unknown unary operator");
664
665  return Builder.CreateCall(F, OperandV, "unop");
666}
667
668Value *BinaryExprAST::Codegen() {
669  // Special case '=' because we don't want to emit the LHS as an expression.
670  if (Op == '=') {
671    // Assignment requires the LHS to be an identifier.
672    // For now, I'm building without RTTI because LLVM builds that way by
673    // default and so we need to build that way to use the command line supprt.
674    // If you build LLVM with RTTI this can be changed back to a dynamic_cast.
675    VariableExprAST *LHSE = reinterpret_cast<VariableExprAST*>(LHS);
676    if (!LHSE)
677      return ErrorV("destination of '=' must be a variable");
678    // Codegen the RHS.
679    Value *Val = RHS->Codegen();
680    if (Val == 0) return 0;
681
682    // Look up the name.
683    Value *Variable = NamedValues[LHSE->getName()];
684    if (Variable == 0) return ErrorV("Unknown variable name");
685
686    Builder.CreateStore(Val, Variable);
687    return Val;
688  }
689
690  Value *L = LHS->Codegen();
691  Value *R = RHS->Codegen();
692  if (L == 0 || R == 0) return 0;
693
694  switch (Op) {
695  case '+': return Builder.CreateFAdd(L, R, "addtmp");
696  case '-': return Builder.CreateFSub(L, R, "subtmp");
697  case '*': return Builder.CreateFMul(L, R, "multmp");
698  case '/': return Builder.CreateFDiv(L, R, "divtmp");
699  case '<':
700    L = Builder.CreateFCmpULT(L, R, "cmptmp");
701    // Convert bool 0/1 to double 0.0 or 1.0
702    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
703                                "booltmp");
704  default: break;
705  }
706
707  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
708  // a call to it.
709  Function *F = TheModule->getFunction(std::string("binary")+Op);
710  assert(F && "binary operator not found!");
711
712  Value *Ops[] = { L, R };
713  return Builder.CreateCall(F, Ops, "binop");
714}
715
716Value *CallExprAST::Codegen() {
717  // Look up the name in the global module table.
718  Function *CalleeF = TheModule->getFunction(Callee);
719  if (CalleeF == 0) {
720    char error_str[64];
721    sprintf(error_str, "Unknown function referenced %s", Callee.c_str());
722    return ErrorV(error_str);
723  }
724
725  // If argument mismatch error.
726  if (CalleeF->arg_size() != Args.size())
727    return ErrorV("Incorrect # arguments passed");
728
729  std::vector<Value*> ArgsV;
730  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
731    ArgsV.push_back(Args[i]->Codegen());
732    if (ArgsV.back() == 0) return 0;
733  }
734
735  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
736}
737
738Value *IfExprAST::Codegen() {
739  Value *CondV = Cond->Codegen();
740  if (CondV == 0) return 0;
741
742  // Convert condition to a bool by comparing equal to 0.0.
743  CondV = Builder.CreateFCmpONE(CondV,
744                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
745                                "ifcond");
746
747  Function *TheFunction = Builder.GetInsertBlock()->getParent();
748
749  // Create blocks for the then and else cases.  Insert the 'then' block at the
750  // end of the function.
751  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
752  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
753  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
754
755  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
756
757  // Emit then value.
758  Builder.SetInsertPoint(ThenBB);
759
760  Value *ThenV = Then->Codegen();
761  if (ThenV == 0) return 0;
762
763  Builder.CreateBr(MergeBB);
764  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
765  ThenBB = Builder.GetInsertBlock();
766
767  // Emit else block.
768  TheFunction->getBasicBlockList().push_back(ElseBB);
769  Builder.SetInsertPoint(ElseBB);
770
771  Value *ElseV = Else->Codegen();
772  if (ElseV == 0) return 0;
773
774  Builder.CreateBr(MergeBB);
775  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
776  ElseBB = Builder.GetInsertBlock();
777
778  // Emit merge block.
779  TheFunction->getBasicBlockList().push_back(MergeBB);
780  Builder.SetInsertPoint(MergeBB);
781  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
782                                  "iftmp");
783
784  PN->addIncoming(ThenV, ThenBB);
785  PN->addIncoming(ElseV, ElseBB);
786  return PN;
787}
788
789Value *ForExprAST::Codegen() {
790  // Output this as:
791  //   var = alloca double
792  //   ...
793  //   start = startexpr
794  //   store start -> var
795  //   goto loop
796  // loop:
797  //   ...
798  //   bodyexpr
799  //   ...
800  // loopend:
801  //   step = stepexpr
802  //   endcond = endexpr
803  //
804  //   curvar = load var
805  //   nextvar = curvar + step
806  //   store nextvar -> var
807  //   br endcond, loop, endloop
808  // outloop:
809
810  Function *TheFunction = Builder.GetInsertBlock()->getParent();
811
812  // Create an alloca for the variable in the entry block.
813  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
814
815  // Emit the start code first, without 'variable' in scope.
816  Value *StartVal = Start->Codegen();
817  if (StartVal == 0) return 0;
818
819  // Store the value into the alloca.
820  Builder.CreateStore(StartVal, Alloca);
821
822  // Make the new basic block for the loop header, inserting after current
823  // block.
824  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
825
826  // Insert an explicit fall through from the current block to the LoopBB.
827  Builder.CreateBr(LoopBB);
828
829  // Start insertion in LoopBB.
830  Builder.SetInsertPoint(LoopBB);
831
832  // Within the loop, the variable is defined equal to the PHI node.  If it
833  // shadows an existing variable, we have to restore it, so save it now.
834  AllocaInst *OldVal = NamedValues[VarName];
835  NamedValues[VarName] = Alloca;
836
837  // Emit the body of the loop.  This, like any other expr, can change the
838  // current BB.  Note that we ignore the value computed by the body, but don't
839  // allow an error.
840  if (Body->Codegen() == 0)
841    return 0;
842
843  // Emit the step value.
844  Value *StepVal;
845  if (Step) {
846    StepVal = Step->Codegen();
847    if (StepVal == 0) return 0;
848  } else {
849    // If not specified, use 1.0.
850    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
851  }
852
853  // Compute the end condition.
854  Value *EndCond = End->Codegen();
855  if (EndCond == 0) return EndCond;
856
857  // Reload, increment, and restore the alloca.  This handles the case where
858  // the body of the loop mutates the variable.
859  Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
860  Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
861  Builder.CreateStore(NextVar, Alloca);
862
863  // Convert condition to a bool by comparing equal to 0.0.
864  EndCond = Builder.CreateFCmpONE(EndCond,
865                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
866                                  "loopcond");
867
868  // Create the "after loop" block and insert it.
869  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
870
871  // Insert the conditional branch into the end of LoopEndBB.
872  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
873
874  // Any new code will be inserted in AfterBB.
875  Builder.SetInsertPoint(AfterBB);
876
877  // Restore the unshadowed variable.
878  if (OldVal)
879    NamedValues[VarName] = OldVal;
880  else
881    NamedValues.erase(VarName);
882
883
884  // for expr always returns 0.0.
885  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
886}
887
888Value *VarExprAST::Codegen() {
889  std::vector<AllocaInst *> OldBindings;
890
891  Function *TheFunction = Builder.GetInsertBlock()->getParent();
892
893  // Register all variables and emit their initializer.
894  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
895    const std::string &VarName = VarNames[i].first;
896    ExprAST *Init = VarNames[i].second;
897
898    // Emit the initializer before adding the variable to scope, this prevents
899    // the initializer from referencing the variable itself, and permits stuff
900    // like this:
901    //  var a = 1 in
902    //    var a = a in ...   # refers to outer 'a'.
903    Value *InitVal;
904    if (Init) {
905      InitVal = Init->Codegen();
906      if (InitVal == 0) return 0;
907    } else { // If not specified, use 0.0.
908      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
909    }
910
911    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
912    Builder.CreateStore(InitVal, Alloca);
913
914    // Remember the old variable binding so that we can restore the binding when
915    // we unrecurse.
916    OldBindings.push_back(NamedValues[VarName]);
917
918    // Remember this binding.
919    NamedValues[VarName] = Alloca;
920  }
921
922  // Codegen the body, now that all vars are in scope.
923  Value *BodyVal = Body->Codegen();
924  if (BodyVal == 0) return 0;
925
926  // Pop all our variables from scope.
927  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
928    NamedValues[VarNames[i].first] = OldBindings[i];
929
930  // Return the body computation.
931  return BodyVal;
932}
933
934Function *PrototypeAST::Codegen() {
935  // Make the function type:  double(double,double) etc.
936  std::vector<Type*> Doubles(Args.size(),
937                             Type::getDoubleTy(getGlobalContext()));
938  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
939                                       Doubles, false);
940
941  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
942  // If F conflicted, there was already something named 'Name'.  If it has a
943  // body, don't allow redefinition or reextern.
944  if (F->getName() != Name) {
945    // Delete the one we just made and get the existing one.
946    F->eraseFromParent();
947    F = TheModule->getFunction(Name);
948    // If F already has a body, reject this.
949    if (!F->empty()) {
950      ErrorF("redefinition of function");
951      return 0;
952    }
953    // If F took a different number of args, reject.
954    if (F->arg_size() != Args.size()) {
955      ErrorF("redefinition of function with different # args");
956      return 0;
957    }
958  }
959
960  // Set names for all arguments.
961  unsigned Idx = 0;
962  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
963       ++AI, ++Idx)
964    AI->setName(Args[Idx]);
965
966  return F;
967}
968
969/// CreateArgumentAllocas - Create an alloca for each argument and register the
970/// argument in the symbol table so that references to it will succeed.
971void PrototypeAST::CreateArgumentAllocas(Function *F) {
972  Function::arg_iterator AI = F->arg_begin();
973  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
974    // Create an alloca for this variable.
975    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
976
977    // Store the initial value into the alloca.
978    Builder.CreateStore(AI, Alloca);
979
980    // Add arguments to variable symbol table.
981    NamedValues[Args[Idx]] = Alloca;
982  }
983}
984
985Function *FunctionAST::Codegen() {
986  NamedValues.clear();
987
988  Function *TheFunction = Proto->Codegen();
989  if (TheFunction == 0)
990    return 0;
991
992  // If this is an operator, install it.
993  if (Proto->isBinaryOp())
994    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
995
996  // Create a new basic block to start insertion into.
997  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
998  Builder.SetInsertPoint(BB);
999
1000  // Add all arguments to the symbol table and create their allocas.
1001  Proto->CreateArgumentAllocas(TheFunction);
1002
1003  if (Value *RetVal = Body->Codegen()) {
1004    // Finish off the function.
1005    Builder.CreateRet(RetVal);
1006
1007    // Validate the generated code, checking for consistency.
1008    verifyFunction(*TheFunction);
1009
1010    // Optimize the function.
1011    TheFPM->run(*TheFunction);
1012
1013    return TheFunction;
1014  }
1015
1016  // Error reading body, remove function.
1017  TheFunction->eraseFromParent();
1018
1019  if (Proto->isBinaryOp())
1020    BinopPrecedence.erase(Proto->getOperatorName());
1021  return 0;
1022}
1023
1024//===----------------------------------------------------------------------===//
1025// Top-Level parsing and JIT Driver
1026//===----------------------------------------------------------------------===//
1027
1028static ExecutionEngine *TheExecutionEngine;
1029
1030static void HandleDefinition() {
1031  if (FunctionAST *F = ParseDefinition()) {
1032    if (Function *LF = F->Codegen()) {
1033#ifndef MINIMAL_STDERR_OUTPUT
1034      fprintf(stderr, "Read function definition:");
1035      LF->dump();
1036#endif
1037    }
1038  } else {
1039    // Skip token for error recovery.
1040    getNextToken();
1041  }
1042}
1043
1044static void HandleExtern() {
1045  if (PrototypeAST *P = ParseExtern()) {
1046    if (Function *F = P->Codegen()) {
1047#ifndef MINIMAL_STDERR_OUTPUT
1048      fprintf(stderr, "Read extern: ");
1049      F->dump();
1050#endif
1051    }
1052  } else {
1053    // Skip token for error recovery.
1054    getNextToken();
1055  }
1056}
1057
1058static void HandleTopLevelExpression() {
1059  // Evaluate a top-level expression into an anonymous function.
1060  if (FunctionAST *F = ParseTopLevelExpr()) {
1061    if (Function *LF = F->Codegen()) {
1062      // JIT the function, returning a function pointer.
1063      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
1064      // Cast it to the right type (takes no arguments, returns a double) so we
1065      // can call it as a native function.
1066      double (*FP)() = (double (*)())(intptr_t)FPtr;
1067#ifdef MINIMAL_STDERR_OUTPUT
1068      FP();
1069#else
1070      fprintf(stderr, "Evaluated to %f\n", FP());
1071#endif
1072    }
1073  } else {
1074    // Skip token for error recovery.
1075    getNextToken();
1076  }
1077}
1078
1079/// top ::= definition | external | expression | ';'
1080static void MainLoop() {
1081  while (1) {
1082#ifndef MINIMAL_STDERR_OUTPUT
1083    fprintf(stderr, "ready> ");
1084#endif
1085    switch (CurTok) {
1086    case tok_eof:    return;
1087    case ';':        getNextToken(); break;  // ignore top-level semicolons.
1088    case tok_def:    HandleDefinition(); break;
1089    case tok_extern: HandleExtern(); break;
1090    default:         HandleTopLevelExpression(); break;
1091    }
1092  }
1093}
1094
1095//===----------------------------------------------------------------------===//
1096// "Library" functions that can be "extern'd" from user code.
1097//===----------------------------------------------------------------------===//
1098
1099/// putchard - putchar that takes a double and returns 0.
1100extern "C"
1101double putchard(double X) {
1102  putchar((char)X);
1103  return 0;
1104}
1105
1106/// printd - printf that takes a double prints it as "%f\n", returning 0.
1107extern "C"
1108double printd(double X) {
1109  printf("%f", X);
1110  return 0;
1111}
1112
1113extern "C"
1114double printlf() {
1115  printf("\n");
1116  return 0;
1117}
1118
1119//===----------------------------------------------------------------------===//
1120// Command line input file handlers
1121//===----------------------------------------------------------------------===//
1122
1123Module* parseInputIR(std::string InputFile) {
1124  SMDiagnostic Err;
1125  Module *M = ParseIRFile(InputFile, Err, getGlobalContext());
1126  if (!M) {
1127    Err.print("IR parsing failed: ", errs());
1128    return NULL;
1129  }
1130
1131  return M;
1132}
1133
1134//===----------------------------------------------------------------------===//
1135// Main driver code.
1136//===----------------------------------------------------------------------===//
1137
1138int main(int argc, char **argv) {
1139  InitializeNativeTarget();
1140  LLVMContext &Context = getGlobalContext();
1141
1142  cl::ParseCommandLineOptions(argc, argv,
1143                              "Kaleidoscope example program\n");
1144
1145  // Install standard binary operators.
1146  // 1 is lowest precedence.
1147  BinopPrecedence['='] = 2;
1148  BinopPrecedence['<'] = 10;
1149  BinopPrecedence['+'] = 20;
1150  BinopPrecedence['-'] = 20;
1151  BinopPrecedence['/'] = 40;
1152  BinopPrecedence['*'] = 40;  // highest.
1153
1154  // Make the module, which holds all the code.
1155  if (!InputIR.empty()) {
1156    TheModule = parseInputIR(InputIR);
1157  } else {
1158    TheModule = new Module("my cool jit", Context);
1159  }
1160
1161  // Create the JIT.  This takes ownership of the module.
1162  std::string ErrStr;
1163  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
1164  if (!TheExecutionEngine) {
1165    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
1166    exit(1);
1167  }
1168
1169  FunctionPassManager OurFPM(TheModule);
1170
1171  // Set up the optimizer pipeline.  Start with registering info about how the
1172  // target lays out data structures.
1173  OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
1174  // Provide basic AliasAnalysis support for GVN.
1175  OurFPM.add(createBasicAliasAnalysisPass());
1176  // Promote allocas to registers.
1177  OurFPM.add(createPromoteMemoryToRegisterPass());
1178  // Do simple "peephole" optimizations and bit-twiddling optzns.
1179  OurFPM.add(createInstructionCombiningPass());
1180  // Reassociate expressions.
1181  OurFPM.add(createReassociatePass());
1182  // Eliminate Common SubExpressions.
1183  OurFPM.add(createGVNPass());
1184  // Simplify the control flow graph (deleting unreachable blocks, etc).
1185  OurFPM.add(createCFGSimplificationPass());
1186
1187  OurFPM.doInitialization();
1188
1189  // Set the global so the code gen can use this.
1190  TheFPM = &OurFPM;
1191
1192  // Prime the first token.
1193#ifndef MINIMAL_STDERR_OUTPUT
1194  fprintf(stderr, "ready> ");
1195#endif
1196  getNextToken();
1197
1198  // Run the main "interpreter loop" now.
1199  MainLoop();
1200
1201  // Print out all of the generated code.
1202  TheFPM = 0;
1203#if !defined(MINIMAL_STDERR_OUTPUT) || defined(DUMP_FINAL_MODULE)
1204  TheModule->dump();
1205#endif
1206  return 0;
1207}
1208