1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "tools/gn/parser.h"
6
7#include "base/logging.h"
8#include "tools/gn/functions.h"
9#include "tools/gn/operators.h"
10#include "tools/gn/token.h"
11
12// grammar:
13//
14// file       := (statement)*
15// statement  := block | if | assignment
16// block      := '{' statement* '}'
17// if         := 'if' '(' expr ')' statement [ else ]
18// else       := 'else' (if | statement)*
19// assignment := ident {'=' | '+=' | '-='} expr
20
21enum Precedence {
22  PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
23  PRECEDENCE_OR = 2,
24  PRECEDENCE_AND = 3,
25  PRECEDENCE_EQUALITY = 4,
26  PRECEDENCE_RELATION = 5,
27  PRECEDENCE_SUM = 6,
28  PRECEDENCE_PREFIX = 7,
29  PRECEDENCE_CALL = 8,
30  PRECEDENCE_DOT = 9,         // Highest precedence.
31};
32
33// The top-level for blocks/ifs is recursive descent, the expression parser is
34// a Pratt parser. The basic idea there is to have the precedences (and
35// associativities) encoded relative to each other and only parse up until you
36// hit something of that precedence. There's a dispatch table in expressions_
37// at the top of parser.cc that describes how each token dispatches if it's
38// seen as either a prefix or infix operator, and if it's infix, what its
39// precedence is.
40//
41// Refs:
42// - http://javascript.crockford.com/tdop/tdop.html
43// - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
44
45// Indexed by Token::Type.
46ParserHelper Parser::expressions_[] = {
47  {NULL, NULL, -1},                                             // INVALID
48  {&Parser::Literal, NULL, -1},                                 // INTEGER
49  {&Parser::Literal, NULL, -1},                                 // STRING
50  {&Parser::Literal, NULL, -1},                                 // TRUE_TOKEN
51  {&Parser::Literal, NULL, -1},                                 // FALSE_TOKEN
52  {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // EQUAL
53  {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM},              // PLUS
54  {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM},              // MINUS
55  {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // PLUS_EQUALS
56  {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // MINUS_EQUALS
57  {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},         // EQUAL_EQUAL
58  {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},         // NOT_EQUAL
59  {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // LESS_EQUAL
60  {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // GREATER_EQUAL
61  {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // LESS_THAN
62  {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // GREATER_THAN
63  {NULL, &Parser::BinaryOperator, PRECEDENCE_AND},              // BOOLEAN_AND
64  {NULL, &Parser::BinaryOperator, PRECEDENCE_OR},               // BOOLEAN_OR
65  {&Parser::Not, NULL, -1},                                     // BANG
66  {NULL, &Parser::DotOperator, PRECEDENCE_DOT},                 // DOT
67  {&Parser::Group, NULL, -1},                                   // LEFT_PAREN
68  {NULL, NULL, -1},                                             // RIGHT_PAREN
69  {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},         // LEFT_BRACKET
70  {NULL, NULL, -1},                                             // RIGHT_BRACKET
71  {NULL, NULL, -1},                                             // LEFT_BRACE
72  {NULL, NULL, -1},                                             // RIGHT_BRACE
73  {NULL, NULL, -1},                                             // IF
74  {NULL, NULL, -1},                                             // ELSE
75  {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
76  {NULL, NULL, -1},                                             // COMMA
77  {NULL, NULL, -1},                                             // COMMENT
78};
79
80Parser::Parser(const std::vector<Token>& tokens, Err* err)
81    : tokens_(tokens), err_(err), cur_(0) {
82}
83
84Parser::~Parser() {
85}
86
87// static
88scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
89                                    Err* err) {
90  Parser p(tokens, err);
91  return p.ParseFile().PassAs<ParseNode>();
92}
93
94// static
95scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
96                                              Err* err) {
97  Parser p(tokens, err);
98  return p.ParseExpression().Pass();
99}
100
101bool Parser::IsAssignment(const ParseNode* node) const {
102  return node && node->AsBinaryOp() &&
103         (node->AsBinaryOp()->op().type() == Token::EQUAL ||
104          node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
105          node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
106}
107
108bool Parser::IsStatementBreak(Token::Type token_type) const {
109  switch (token_type) {
110    case Token::IDENTIFIER:
111    case Token::LEFT_BRACE:
112    case Token::RIGHT_BRACE:
113    case Token::IF:
114    case Token::ELSE:
115      return true;
116    default:
117      return false;
118  }
119}
120
121bool Parser::LookAhead(Token::Type type) {
122  if (at_end())
123    return false;
124  return cur_token().type() == type;
125}
126
127bool Parser::Match(Token::Type type) {
128  if (!LookAhead(type))
129    return false;
130  Consume();
131  return true;
132}
133
134Token Parser::Consume(Token::Type type, const char* error_message) {
135  Token::Type types[1] = { type };
136  return Consume(types, 1, error_message);
137}
138
139Token Parser::Consume(Token::Type* types,
140                      size_t num_types,
141                      const char* error_message) {
142  if (has_error()) {
143    // Don't overwrite current error, but make progress through tokens so that
144    // a loop that's expecting a particular token will still terminate.
145    cur_++;
146    return Token(Location(), Token::INVALID, base::StringPiece());
147  }
148  if (at_end()) {
149    const char kEOFMsg[] = "I hit EOF instead.";
150    if (tokens_.empty())
151      *err_ = Err(Location(), error_message, kEOFMsg);
152    else
153      *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
154    return Token(Location(), Token::INVALID, base::StringPiece());
155  }
156
157  for (size_t i = 0; i < num_types; ++i) {
158    if (cur_token().type() == types[i])
159      return tokens_[cur_++];
160  }
161  *err_ = Err(cur_token(), error_message);
162  return Token(Location(), Token::INVALID, base::StringPiece());
163}
164
165Token Parser::Consume() {
166  return tokens_[cur_++];
167}
168
169scoped_ptr<ParseNode> Parser::ParseExpression() {
170  return ParseExpression(0);
171}
172
173scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
174  if (at_end())
175    return scoped_ptr<ParseNode>();
176
177  Token token = Consume();
178  PrefixFunc prefix = expressions_[token.type()].prefix;
179
180  if (prefix == NULL) {
181    *err_ = Err(token,
182                std::string("Unexpected token '") + token.value().as_string() +
183                    std::string("'"));
184    return scoped_ptr<ParseNode>();
185  }
186
187  scoped_ptr<ParseNode> left = (this->*prefix)(token);
188  if (has_error())
189    return left.Pass();
190
191  while (!at_end() && !IsStatementBreak(cur_token().type()) &&
192         precedence <= expressions_[cur_token().type()].precedence) {
193    token = Consume();
194    InfixFunc infix = expressions_[token.type()].infix;
195    if (infix == NULL) {
196      *err_ = Err(token,
197                  std::string("Unexpected token '") +
198                      token.value().as_string() + std::string("'"));
199      return scoped_ptr<ParseNode>();
200    }
201    left = (this->*infix)(left.Pass(), token);
202    if (has_error())
203      return scoped_ptr<ParseNode>();
204  }
205
206  return left.Pass();
207}
208
209scoped_ptr<ParseNode> Parser::Literal(Token token) {
210  return scoped_ptr<ParseNode>(new LiteralNode(token)).Pass();
211}
212
213scoped_ptr<ParseNode> Parser::Name(Token token) {
214  return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
215}
216
217scoped_ptr<ParseNode> Parser::Group(Token token) {
218  scoped_ptr<ParseNode> expr = ParseExpression();
219  if (has_error())
220    return scoped_ptr<ParseNode>();
221  Consume(Token::RIGHT_PAREN, "Expected ')'");
222  return expr.Pass();
223}
224
225scoped_ptr<ParseNode> Parser::Not(Token token) {
226  scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
227  if (has_error())
228    return scoped_ptr<ParseNode>();
229  scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
230  unary_op->set_op(token);
231  unary_op->set_operand(expr.Pass());
232  return unary_op.PassAs<ParseNode>();
233}
234
235scoped_ptr<ParseNode> Parser::List(Token node) {
236  scoped_ptr<ParseNode> list(ParseList(Token::RIGHT_BRACKET, true));
237  if (!has_error() && !at_end())
238    Consume(Token::RIGHT_BRACKET, "Expected ']'");
239  return list.Pass();
240}
241
242scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
243                                             Token token) {
244  scoped_ptr<ParseNode> right =
245      ParseExpression(expressions_[token.type()].precedence + 1);
246  if (!right) {
247    *err_ =
248        Err(token,
249            "Expected right hand side for '" + token.value().as_string() + "'");
250    return scoped_ptr<ParseNode>();
251  }
252  scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
253  binary_op->set_op(token);
254  binary_op->set_left(left.Pass());
255  binary_op->set_right(right.Pass());
256  return binary_op.PassAs<ParseNode>();
257}
258
259scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
260                                               Token token) {
261  scoped_ptr<ListNode> list(new ListNode);
262  list->set_begin_token(token);
263  list->set_end_token(token);
264  scoped_ptr<BlockNode> block;
265  bool has_arg = false;
266  if (Match(Token::LEFT_PAREN)) {
267    // Parsing a function call.
268    has_arg = true;
269    if (Match(Token::RIGHT_PAREN)) {
270      // Nothing, just an empty call.
271    } else {
272      list = ParseList(Token::RIGHT_PAREN, false);
273      if (has_error())
274        return scoped_ptr<ParseNode>();
275      Consume(Token::RIGHT_PAREN, "Expected ')' after call");
276    }
277    // Optionally with a scope.
278    if (LookAhead(Token::LEFT_BRACE)) {
279      block = ParseBlock();
280      if (has_error())
281        return scoped_ptr<ParseNode>();
282    }
283  }
284
285  if (!left && !has_arg) {
286    // Not a function call, just a standalone identifier.
287    return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
288  }
289  scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
290  func_call->set_function(token);
291  func_call->set_args(list.Pass());
292  if (block)
293    func_call->set_block(block.Pass());
294  return func_call.PassAs<ParseNode>();
295}
296
297scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
298                                         Token token) {
299  if (left->AsIdentifier() == NULL) {
300    *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
301    return scoped_ptr<ParseNode>();
302  }
303  scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
304  scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
305  assign->set_op(token);
306  assign->set_left(left.Pass());
307  assign->set_right(value.Pass());
308  return assign.PassAs<ParseNode>();
309}
310
311scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
312                                        Token token) {
313  // TODO: Maybe support more complex expressions like a[0][0]. This would
314  // require work on the evaluator too.
315  if (left->AsIdentifier() == NULL) {
316    *err_ = Err(left.get(), "May only subscript identifiers.",
317        "The thing on the left hand side of the [] must be an identifier\n"
318        "and not an expression. If you need this, you'll have to assign the\n"
319        "value to a temporary before subscripting. Sorry.");
320    return scoped_ptr<ParseNode>();
321  }
322  scoped_ptr<ParseNode> value = ParseExpression();
323  Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
324  scoped_ptr<AccessorNode> accessor(new AccessorNode);
325  accessor->set_base(left->AsIdentifier()->value());
326  accessor->set_index(value.Pass());
327  return accessor.PassAs<ParseNode>();
328}
329
330scoped_ptr<ParseNode> Parser::DotOperator(scoped_ptr<ParseNode> left,
331                                          Token token) {
332  if (left->AsIdentifier() == NULL) {
333    *err_ = Err(left.get(), "May only use \".\" for identifiers.",
334        "The thing on the left hand side of the dot must be an identifier\n"
335        "and not an expression. If you need this, you'll have to assign the\n"
336        "value to a temporary first. Sorry.");
337    return scoped_ptr<ParseNode>();
338  }
339
340  scoped_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
341  if (!right || !right->AsIdentifier()) {
342    *err_ = Err(token, "Expected identifier for right-hand-side of \".\"",
343        "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
344    return scoped_ptr<ParseNode>();
345  }
346
347  scoped_ptr<AccessorNode> accessor(new AccessorNode);
348  accessor->set_base(left->AsIdentifier()->value());
349  accessor->set_member(scoped_ptr<IdentifierNode>(
350      static_cast<IdentifierNode*>(right.release())));
351  return accessor.PassAs<ParseNode>();
352}
353
354// Does not Consume the start or end token.
355scoped_ptr<ListNode> Parser::ParseList(Token::Type stop_before,
356                                       bool allow_trailing_comma) {
357  scoped_ptr<ListNode> list(new ListNode);
358  list->set_begin_token(cur_token());
359  bool just_got_comma = false;
360  bool first_time = true;
361  while (!LookAhead(stop_before)) {
362    if (!first_time) {
363      if (!just_got_comma) {
364        // Require commas separate things in lists.
365        *err_ = Err(cur_token(), "Expected comma between items.");
366        return scoped_ptr<ListNode>();
367      }
368    }
369    first_time = false;
370
371    // Why _OR? We're parsing things that are higher precedence than the ,
372    // that separates the items of the list. , should appear lower than
373    // boolean expressions (the lowest of which is OR), but above assignments.
374    list->append_item(ParseExpression(PRECEDENCE_OR));
375    if (has_error())
376      return scoped_ptr<ListNode>();
377    if (at_end()) {
378      *err_ =
379          Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
380      return scoped_ptr<ListNode>();
381    }
382    just_got_comma = Match(Token::COMMA);
383  }
384  if (just_got_comma && !allow_trailing_comma) {
385    *err_ = Err(cur_token(), "Trailing comma");
386    return scoped_ptr<ListNode>();
387  }
388  list->set_end_token(cur_token());
389  return list.Pass();
390}
391
392scoped_ptr<ParseNode> Parser::ParseFile() {
393  scoped_ptr<BlockNode> file(new BlockNode(false));
394  for (;;) {
395    if (at_end())
396      break;
397    scoped_ptr<ParseNode> statement = ParseStatement();
398    if (!statement)
399      break;
400    file->append_statement(statement.Pass());
401  }
402  if (!at_end() && !has_error())
403    *err_ = Err(cur_token(), "Unexpected here, should be newline.");
404  if (has_error())
405    return scoped_ptr<ParseNode>();
406  return file.PassAs<ParseNode>();
407}
408
409scoped_ptr<ParseNode> Parser::ParseStatement() {
410  if (LookAhead(Token::LEFT_BRACE)) {
411    return ParseBlock().PassAs<ParseNode>();
412  } else if (LookAhead(Token::IF)) {
413    return ParseCondition();
414  } else {
415    // TODO(scottmg): Is this too strict? Just drop all the testing if we want
416    // to allow "pointless" expressions and return ParseExpression() directly.
417    scoped_ptr<ParseNode> stmt = ParseExpression();
418    if (stmt) {
419      if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
420        return stmt.Pass();
421    }
422    if (!has_error()) {
423      Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
424      *err_ = Err(token, "Expecting assignment or function call.");
425    }
426    return scoped_ptr<ParseNode>();
427  }
428}
429
430scoped_ptr<BlockNode> Parser::ParseBlock() {
431  Token begin_token =
432      Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
433  if (has_error())
434    return scoped_ptr<BlockNode>();
435  scoped_ptr<BlockNode> block(new BlockNode(true));
436  block->set_begin_token(begin_token);
437
438  for (;;) {
439    if (LookAhead(Token::RIGHT_BRACE)) {
440      block->set_end_token(Consume());
441      break;
442    }
443
444    scoped_ptr<ParseNode> statement = ParseStatement();
445    if (!statement)
446      return scoped_ptr<BlockNode>();
447    block->append_statement(statement.Pass());
448  }
449  return block.Pass();
450}
451
452scoped_ptr<ParseNode> Parser::ParseCondition() {
453  scoped_ptr<ConditionNode> condition(new ConditionNode);
454  Consume(Token::IF, "Expected 'if'");
455  Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
456  condition->set_condition(ParseExpression());
457  if (IsAssignment(condition->condition()))
458    *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
459  Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
460  condition->set_if_true(ParseBlock().Pass());
461  if (Match(Token::ELSE))
462    condition->set_if_false(ParseStatement().Pass());
463  if (has_error())
464    return scoped_ptr<ParseNode>();
465  return condition.PassAs<ParseNode>();
466}
467