1#include <cstdio> 2#include <cstdlib> 3#include <string> 4#include <map> 5#include <vector> 6 7//===----------------------------------------------------------------------===// 8// Lexer 9//===----------------------------------------------------------------------===// 10 11// The lexer returns tokens [0-255] if it is an unknown character, otherwise one 12// of these for known things. 13enum Token { 14 tok_eof = -1, 15 16 // commands 17 tok_def = -2, tok_extern = -3, 18 19 // primary 20 tok_identifier = -4, tok_number = -5 21}; 22 23static std::string IdentifierStr; // Filled in if tok_identifier 24static double NumVal; // Filled in if tok_number 25 26/// gettok - Return the next token from standard input. 27static int gettok() { 28 static int LastChar = ' '; 29 30 // Skip any whitespace. 31 while (isspace(LastChar)) 32 LastChar = getchar(); 33 34 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 35 IdentifierStr = LastChar; 36 while (isalnum((LastChar = getchar()))) 37 IdentifierStr += LastChar; 38 39 if (IdentifierStr == "def") return tok_def; 40 if (IdentifierStr == "extern") return tok_extern; 41 return tok_identifier; 42 } 43 44 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 45 std::string NumStr; 46 do { 47 NumStr += LastChar; 48 LastChar = getchar(); 49 } while (isdigit(LastChar) || LastChar == '.'); 50 51 NumVal = strtod(NumStr.c_str(), 0); 52 return tok_number; 53 } 54 55 if (LastChar == '#') { 56 // Comment until end of line. 57 do LastChar = getchar(); 58 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 59 60 if (LastChar != EOF) 61 return gettok(); 62 } 63 64 // Check for end of file. Don't eat the EOF. 65 if (LastChar == EOF) 66 return tok_eof; 67 68 // Otherwise, just return the character as its ascii value. 69 int ThisChar = LastChar; 70 LastChar = getchar(); 71 return ThisChar; 72} 73 74//===----------------------------------------------------------------------===// 75// Abstract Syntax Tree (aka Parse Tree) 76//===----------------------------------------------------------------------===// 77 78/// ExprAST - Base class for all expression nodes. 79class ExprAST { 80public: 81 virtual ~ExprAST() {} 82}; 83 84/// NumberExprAST - Expression class for numeric literals like "1.0". 85class NumberExprAST : public ExprAST { 86 double Val; 87public: 88 NumberExprAST(double val) : Val(val) {} 89}; 90 91/// VariableExprAST - Expression class for referencing a variable, like "a". 92class VariableExprAST : public ExprAST { 93 std::string Name; 94public: 95 VariableExprAST(const std::string &name) : Name(name) {} 96}; 97 98/// BinaryExprAST - Expression class for a binary operator. 99class BinaryExprAST : public ExprAST { 100 char Op; 101 ExprAST *LHS, *RHS; 102public: 103 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 104 : Op(op), LHS(lhs), RHS(rhs) {} 105}; 106 107/// CallExprAST - Expression class for function calls. 108class CallExprAST : public ExprAST { 109 std::string Callee; 110 std::vector<ExprAST*> Args; 111public: 112 CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) 113 : Callee(callee), Args(args) {} 114}; 115 116/// PrototypeAST - This class represents the "prototype" for a function, 117/// which captures its name, and its argument names (thus implicitly the number 118/// of arguments the function takes). 119class PrototypeAST { 120 std::string Name; 121 std::vector<std::string> Args; 122public: 123 PrototypeAST(const std::string &name, const std::vector<std::string> &args) 124 : Name(name), Args(args) {} 125 126}; 127 128/// FunctionAST - This class represents a function definition itself. 129class FunctionAST { 130 PrototypeAST *Proto; 131 ExprAST *Body; 132public: 133 FunctionAST(PrototypeAST *proto, ExprAST *body) 134 : Proto(proto), Body(body) {} 135 136}; 137 138//===----------------------------------------------------------------------===// 139// Parser 140//===----------------------------------------------------------------------===// 141 142/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 143/// token the parser is looking at. getNextToken reads another token from the 144/// lexer and updates CurTok with its results. 145static int CurTok; 146static int getNextToken() { 147 return CurTok = gettok(); 148} 149 150/// BinopPrecedence - This holds the precedence for each binary operator that is 151/// defined. 152static std::map<char, int> BinopPrecedence; 153 154/// GetTokPrecedence - Get the precedence of the pending binary operator token. 155static int GetTokPrecedence() { 156 if (!isascii(CurTok)) 157 return -1; 158 159 // Make sure it's a declared binop. 160 int TokPrec = BinopPrecedence[CurTok]; 161 if (TokPrec <= 0) return -1; 162 return TokPrec; 163} 164 165/// Error* - These are little helper functions for error handling. 166ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} 167PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } 168FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } 169 170static ExprAST *ParseExpression(); 171 172/// identifierexpr 173/// ::= identifier 174/// ::= identifier '(' expression* ')' 175static ExprAST *ParseIdentifierExpr() { 176 std::string IdName = IdentifierStr; 177 178 getNextToken(); // eat identifier. 179 180 if (CurTok != '(') // Simple variable ref. 181 return new VariableExprAST(IdName); 182 183 // Call. 184 getNextToken(); // eat ( 185 std::vector<ExprAST*> Args; 186 if (CurTok != ')') { 187 while (1) { 188 ExprAST *Arg = ParseExpression(); 189 if (!Arg) return 0; 190 Args.push_back(Arg); 191 192 if (CurTok == ')') break; 193 194 if (CurTok != ',') 195 return Error("Expected ')' or ',' in argument list"); 196 getNextToken(); 197 } 198 } 199 200 // Eat the ')'. 201 getNextToken(); 202 203 return new CallExprAST(IdName, Args); 204} 205 206/// numberexpr ::= number 207static ExprAST *ParseNumberExpr() { 208 ExprAST *Result = new NumberExprAST(NumVal); 209 getNextToken(); // consume the number 210 return Result; 211} 212 213/// parenexpr ::= '(' expression ')' 214static ExprAST *ParseParenExpr() { 215 getNextToken(); // eat (. 216 ExprAST *V = ParseExpression(); 217 if (!V) return 0; 218 219 if (CurTok != ')') 220 return Error("expected ')'"); 221 getNextToken(); // eat ). 222 return V; 223} 224 225/// primary 226/// ::= identifierexpr 227/// ::= numberexpr 228/// ::= parenexpr 229static ExprAST *ParsePrimary() { 230 switch (CurTok) { 231 default: return Error("unknown token when expecting an expression"); 232 case tok_identifier: return ParseIdentifierExpr(); 233 case tok_number: return ParseNumberExpr(); 234 case '(': return ParseParenExpr(); 235 } 236} 237 238/// binoprhs 239/// ::= ('+' primary)* 240static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { 241 // If this is a binop, find its precedence. 242 while (1) { 243 int TokPrec = GetTokPrecedence(); 244 245 // If this is a binop that binds at least as tightly as the current binop, 246 // consume it, otherwise we are done. 247 if (TokPrec < ExprPrec) 248 return LHS; 249 250 // Okay, we know this is a binop. 251 int BinOp = CurTok; 252 getNextToken(); // eat binop 253 254 // Parse the primary expression after the binary operator. 255 ExprAST *RHS = ParsePrimary(); 256 if (!RHS) return 0; 257 258 // If BinOp binds less tightly with RHS than the operator after RHS, let 259 // the pending operator take RHS as its LHS. 260 int NextPrec = GetTokPrecedence(); 261 if (TokPrec < NextPrec) { 262 RHS = ParseBinOpRHS(TokPrec+1, RHS); 263 if (RHS == 0) return 0; 264 } 265 266 // Merge LHS/RHS. 267 LHS = new BinaryExprAST(BinOp, LHS, RHS); 268 } 269} 270 271/// expression 272/// ::= primary binoprhs 273/// 274static ExprAST *ParseExpression() { 275 ExprAST *LHS = ParsePrimary(); 276 if (!LHS) return 0; 277 278 return ParseBinOpRHS(0, LHS); 279} 280 281/// prototype 282/// ::= id '(' id* ')' 283static PrototypeAST *ParsePrototype() { 284 if (CurTok != tok_identifier) 285 return ErrorP("Expected function name in prototype"); 286 287 std::string FnName = IdentifierStr; 288 getNextToken(); 289 290 if (CurTok != '(') 291 return ErrorP("Expected '(' in prototype"); 292 293 std::vector<std::string> ArgNames; 294 while (getNextToken() == tok_identifier) 295 ArgNames.push_back(IdentifierStr); 296 if (CurTok != ')') 297 return ErrorP("Expected ')' in prototype"); 298 299 // success. 300 getNextToken(); // eat ')'. 301 302 return new PrototypeAST(FnName, ArgNames); 303} 304 305/// definition ::= 'def' prototype expression 306static FunctionAST *ParseDefinition() { 307 getNextToken(); // eat def. 308 PrototypeAST *Proto = ParsePrototype(); 309 if (Proto == 0) return 0; 310 311 if (ExprAST *E = ParseExpression()) 312 return new FunctionAST(Proto, E); 313 return 0; 314} 315 316/// toplevelexpr ::= expression 317static FunctionAST *ParseTopLevelExpr() { 318 if (ExprAST *E = ParseExpression()) { 319 // Make an anonymous proto. 320 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); 321 return new FunctionAST(Proto, E); 322 } 323 return 0; 324} 325 326/// external ::= 'extern' prototype 327static PrototypeAST *ParseExtern() { 328 getNextToken(); // eat extern. 329 return ParsePrototype(); 330} 331 332//===----------------------------------------------------------------------===// 333// Top-Level parsing 334//===----------------------------------------------------------------------===// 335 336static void HandleDefinition() { 337 if (ParseDefinition()) { 338 fprintf(stderr, "Parsed a function definition.\n"); 339 } else { 340 // Skip token for error recovery. 341 getNextToken(); 342 } 343} 344 345static void HandleExtern() { 346 if (ParseExtern()) { 347 fprintf(stderr, "Parsed an extern\n"); 348 } else { 349 // Skip token for error recovery. 350 getNextToken(); 351 } 352} 353 354static void HandleTopLevelExpression() { 355 // Evaluate a top-level expression into an anonymous function. 356 if (ParseTopLevelExpr()) { 357 fprintf(stderr, "Parsed a top-level expr\n"); 358 } else { 359 // Skip token for error recovery. 360 getNextToken(); 361 } 362} 363 364/// top ::= definition | external | expression | ';' 365static void MainLoop() { 366 while (1) { 367 fprintf(stderr, "ready> "); 368 switch (CurTok) { 369 case tok_eof: return; 370 case ';': getNextToken(); break; // ignore top-level semicolons. 371 case tok_def: HandleDefinition(); break; 372 case tok_extern: HandleExtern(); break; 373 default: HandleTopLevelExpression(); break; 374 } 375 } 376} 377 378//===----------------------------------------------------------------------===// 379// Main driver code. 380//===----------------------------------------------------------------------===// 381 382int main() { 383 // Install standard binary operators. 384 // 1 is lowest precedence. 385 BinopPrecedence['<'] = 10; 386 BinopPrecedence['+'] = 20; 387 BinopPrecedence['-'] = 20; 388 BinopPrecedence['*'] = 40; // highest. 389 390 // Prime the first token. 391 fprintf(stderr, "ready> "); 392 getNextToken(); 393 394 // Run the main "interpreter loop" now. 395 MainLoop(); 396 397 return 0; 398} 399