1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "tools/gn/parser.h"
6
7#include "base/logging.h"
8#include "tools/gn/functions.h"
9#include "tools/gn/operators.h"
10#include "tools/gn/token.h"
11
12namespace {
13
14// Returns true if the two tokens are on the same line. We assume they're in
15// the same file.
16bool IsSameLine(const Token& a, const Token& b) {
17  DCHECK(a.location().file() == b.location().file());
18  return a.location().line_number() == b.location().line_number();
19}
20
21}  // namespace
22
23Parser::Parser(const std::vector<Token>& tokens, Err* err)
24    : tokens_(tokens),
25      err_(err),
26      cur_(0) {
27}
28
29Parser::~Parser() {
30}
31
32// static
33scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
34                                    Err* err) {
35  Parser p(tokens, err);
36  return p.ParseBlock(false).PassAs<ParseNode>();
37}
38
39// static
40scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
41                                              Err* err) {
42  Parser p(tokens, err);
43  return p.ParseExpression().Pass();
44}
45
46bool Parser::IsToken(Token::Type type, char* str) const {
47  if (at_end())
48    return false;
49  return cur_token().type() == type || cur_token().value() == str;
50}
51
52scoped_ptr<AccessorNode> Parser::ParseAccessor() {
53  scoped_ptr<AccessorNode> accessor(new AccessorNode);
54
55  DCHECK(cur_token().type() == Token::IDENTIFIER);
56  accessor->set_base(cur_token());
57  cur_++;  // Skip identifier.
58  cur_++;  // Skip "[" (we know this exists because the existance of this
59           // token is how the caller knows it's an accessor.
60
61  if (at_end()) {
62    *err_ = MakeEOFError("Got EOF when looking for list index.");
63    return scoped_ptr<AccessorNode>();
64  }
65
66  // Get the expression.
67  scoped_ptr<ParseNode> expr = ParseExpression().Pass();
68  if (has_error())
69    return scoped_ptr<AccessorNode>();
70  if (at_end()) {
71    *err_ = MakeEOFError("Got EOF when looking for list accessor ]");
72    return scoped_ptr<AccessorNode>();
73  }
74  accessor->set_index(expr.Pass());
75
76  // Skip over "]"
77  if (!cur_token().IsScoperEqualTo("]")) {
78    *err_ = Err(cur_token(), "Expecting ]",
79               "You started a list access but didn't terminate it, and instead "
80               "I fould this\nstupid thing.");
81    return scoped_ptr<AccessorNode>();
82  }
83  cur_++;
84
85  return accessor.Pass();
86}
87
88// Blocks at the file scope don't need {} so we have the option to ignore
89// them. When need_braces is set, we'll expect a begin an end brace.
90//
91// block := "{" block_contents "}"
92// block_contents := (expression | conditional | block)*
93scoped_ptr<BlockNode> Parser::ParseBlock(bool need_braces) {
94  scoped_ptr<BlockNode> block(new BlockNode(true));
95
96  // Eat initial { if necessary.
97  const Token* opening_curly_brace;
98  if (need_braces) {
99    if (at_end()) {
100      *err_ = MakeEOFError("Got EOF when looking for { for block.",
101                           "It should have been after here.");
102      return scoped_ptr<BlockNode>();
103    } else if(!IsScopeBeginScoper(cur_token())) {
104      *err_ = Err(cur_token(), "Expecting { instead of this thing.",
105                  "THOU SHALT USE CURLY BRACES FOR ALL BLOCKS.");
106      return scoped_ptr<BlockNode>();
107    }
108    opening_curly_brace = &cur_token();
109    block->set_begin_token(opening_curly_brace);
110    cur_++;
111  }
112
113  // Loop until EOF or end brace found.
114  while (!at_end() && !IsScopeEndScoper(cur_token())) {
115    if (cur_token().IsIdentifierEqualTo("if")) {
116      // Conditional.
117      block->append_statement(ParseCondition().PassAs<ParseNode>());
118    } else if (IsScopeBeginScoper(cur_token())) {
119      // Nested block.
120      block->append_statement(ParseBlock(true).PassAs<ParseNode>());
121    } else {
122      // Everything else is an expression.
123      block->append_statement(ParseExpression().PassAs<ParseNode>());
124    }
125    if (has_error())
126      return scoped_ptr<BlockNode>();
127  }
128
129  // Eat the ending "}" if necessary.
130  if (need_braces) {
131    if (at_end() || !IsScopeEndScoper(cur_token())) {
132      *err_ = Err(*opening_curly_brace, "Expecting }",
133                  "I ran headlong into the end of the file looking for the "
134                  "closing brace\ncorresponding to this one.");
135      return scoped_ptr<BlockNode>();
136    }
137    block->set_end_token(&cur_token());
138    cur_++;  // Skip past "}".
139  }
140
141  return block.Pass();
142}
143
144// conditional := "if (" expression ")" block [else_conditional]
145// else_conditional := ("else" block) | ("else" conditional)
146scoped_ptr<ConditionNode> Parser::ParseCondition() {
147  scoped_ptr<ConditionNode> cond(new ConditionNode);
148
149  // Skip past "if".
150  const Token& if_token = cur_token();
151  cond->set_if_token(if_token);
152  DCHECK(if_token.IsIdentifierEqualTo("if"));
153  cur_++;
154
155  if (at_end() || !IsFunctionCallArgBeginScoper(cur_token())) {
156    *err_ = Err(if_token, "Expecting \"(\" after \"if\"",
157                "Did you think this was Python or something?");
158    return scoped_ptr<ConditionNode>();
159  }
160
161  // Skip over (.
162  const Token& open_paren_token = cur_token();
163  cur_++;
164  if (at_end()) {
165    *err_ = Err(if_token, "Unexpected EOF inside if condition");
166    return scoped_ptr<ConditionNode>();
167  }
168
169  // Condition inside ().
170  cond->set_condition(ParseExpression().Pass());
171  if (has_error())
172    return scoped_ptr<ConditionNode>();
173
174  if (at_end() || !IsFunctionCallArgEndScoper(cur_token())) {
175    *err_ = Err(open_paren_token, "Expecting \")\" for \"if\" condition",
176                "You didn't finish the thought you started here.");
177    return scoped_ptr<ConditionNode>();
178  }
179  cur_++;  // Skip over )
180
181  // Contents of {}.
182  cond->set_if_true(ParseBlock(true).Pass());
183  if (has_error())
184    return scoped_ptr<ConditionNode>();
185
186  // Optional "else" at the end.
187  if (!at_end() && cur_token().IsIdentifierEqualTo("else")) {
188    cur_++;
189
190    // The else may be followed by an if or a block.
191    if (at_end()) {
192      *err_ = MakeEOFError("Ran into end of file after \"else\".",
193                           "else, WHAT?!?!?");
194      return scoped_ptr<ConditionNode>();
195    }
196    if (cur_token().IsIdentifierEqualTo("if")) {
197      // "else if() {"
198      cond->set_if_false(ParseCondition().PassAs<ParseNode>());
199    } else if (IsScopeBeginScoper(cur_token())) {
200      // "else {"
201      cond->set_if_false(ParseBlock(true).PassAs<ParseNode>());
202    } else {
203      // else <anything else>
204      *err_ = Err(cur_token(), "Expected \"if\" or \"{\" after \"else\".",
205                  "This is neither of those things.");
206      return scoped_ptr<ConditionNode>();
207    }
208  }
209
210  if (has_error())
211    return scoped_ptr<ConditionNode>();
212  return cond.Pass();
213}
214
215// expression := paren_expression | accessor | identifier | literal |
216//               funccall | unary_expression | binary_expression
217//
218// accessor := identifier <non-newline-whitespace>* "[" expression "]"
219//
220// The "non-newline-whitespace is used to differentiate between this case:
221//   a[1]
222// and this one:
223//   a
224//   [1]
225// The second one is kind of stupid (since it does nothing with the values)
226// but is still legal.
227scoped_ptr<ParseNode> Parser::ParseExpression() {
228  scoped_ptr<ParseNode> expr = ParseExpressionExceptBinaryOperators();
229  if (has_error())
230    return scoped_ptr<ParseNode>();
231
232  // That may have hit EOF, in which case we can't have any binary operators.
233  if (at_end())
234    return expr.Pass();
235
236  // TODO(brettw) handle operator precidence!
237  // Gobble up all subsequent expressions as long as there are binary
238  // operators.
239
240  if (IsBinaryOperator(cur_token())) {
241    scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
242    binary_op->set_left(expr.Pass());
243    const Token& operator_token = cur_token();
244    binary_op->set_op(operator_token);
245    cur_++;
246    if (at_end()) {
247      *err_ = Err(operator_token, "Unexpected EOF in expression.",
248                  "I was looking for the right-hand-side of this operator.");
249      return scoped_ptr<ParseNode>();
250    }
251    binary_op->set_right(ParseExpression().Pass());
252    if (has_error())
253      return scoped_ptr<ParseNode>();
254    return binary_op.PassAs<ParseNode>();
255  }
256
257  return expr.Pass();
258}
259
260
261// This internal one does not handle binary operators, since it requires
262// looking at the "next" thing. The regular ParseExpression above handles it.
263scoped_ptr<ParseNode> Parser::ParseExpressionExceptBinaryOperators() {
264  if (at_end())
265    return scoped_ptr<ParseNode>();
266
267  const Token& token = cur_token();
268
269  // Unary expression.
270  if (IsUnaryOperator(token))
271    return ParseUnaryOp().PassAs<ParseNode>();
272
273  // Parenthesized expressions.
274  if (token.IsScoperEqualTo("("))
275    return ParseParenExpression();
276
277  // Function calls.
278  if (token.type() == Token::IDENTIFIER) {
279    if (has_next_token() && IsFunctionCallArgBeginScoper(next_token()))
280      return ParseFunctionCall().PassAs<ParseNode>();
281  }
282
283  // Lists.
284  if (token.IsScoperEqualTo("[")) {
285    return ParseList(Token(Location(), Token::SCOPER, "["),
286                     Token(Location(), Token::SCOPER, "]")).PassAs<ParseNode>();
287  }
288
289  // Literals.
290  if (token.type() == Token::STRING || token.type() == Token::INTEGER) {
291    cur_++;
292    return scoped_ptr<ParseNode>(new LiteralNode(token));
293  }
294
295  // Accessors.
296  if (token.type() == Token::IDENTIFIER &&
297      has_next_token() && next_token().IsScoperEqualTo("[") &&
298      IsSameLine(token, next_token())) {
299    return ParseAccessor().PassAs<ParseNode>();
300  }
301
302  // Identifiers.
303  if (token.type() == Token::IDENTIFIER) {
304    cur_++;
305    return scoped_ptr<ParseNode>(new IdentifierNode(token));
306  }
307
308  // Handle errors.
309  if (token.type() == Token::SEPARATOR) {
310    *err_ = Err(token, "Unexpected comma.",
311                "You can't put a comma here, it must be in list separating "
312                "complete\nthoughts.");
313  } else if (IsScopeBeginScoper(token)) {
314    *err_ = Err(token, "Unexpected token.",
315                "You can't put a \"{\" scope here, it must be in a block.");
316  } else {
317    *err_ = Err(token, "Unexpected token.",
318                "I was really hoping for something else here and you let me down.");
319  }
320  return scoped_ptr<ParseNode>();
321}
322
323// function_call := identifier "(" list_contents ")"
324//                  [<non-newline-whitespace>* block]
325scoped_ptr<FunctionCallNode> Parser::ParseFunctionCall() {
326  scoped_ptr<FunctionCallNode> func(new FunctionCallNode);
327
328  const Token& function_token = cur_token();
329  func->set_function(function_token);
330
331  // This function should only get called when we know we have a function,
332  // which only happens when there is a paren following the name. Skip past it.
333  DCHECK(has_next_token());
334  cur_++;  // Skip past function name to (.
335  const Token& open_paren_token = cur_token();
336  DCHECK(IsFunctionCallArgBeginScoper(open_paren_token));
337
338  if (at_end()) {
339    *err_ = Err(open_paren_token, "Unexpected EOF for function call.",
340                "You didn't finish the thought you started here.");
341    return scoped_ptr<FunctionCallNode>();
342  }
343
344  // Arguments.
345  func->set_args(ParseList(Token(Location(), Token::SCOPER, "("),
346                           Token(Location(), Token::SCOPER, ")")));
347  if (has_error())
348    return scoped_ptr<FunctionCallNode>();
349
350  // Optional {} after function call for certain functions. The "{" must be on
351  // the same line as the ")" to disambiguate the case of a function followed
352  // by a random block just used for scoping purposes.
353  if (!at_end() && IsScopeBeginScoper(cur_token())) {
354    const Token& args_end_token = tokens_[cur_ - 1];
355    DCHECK(args_end_token.IsScoperEqualTo(")"));
356    if (IsSameLine(args_end_token, cur_token()))
357      func->set_block(ParseBlock(true).Pass());
358  }
359
360  if (has_error())
361    return scoped_ptr<FunctionCallNode>();
362  return func.Pass();
363}
364
365// list := "[" expression* "]"
366// list_contents := [(expression ",")* expression [","]]
367//
368// The list_contents is also used in function calls surrounded by parens, so
369// this function takes the tokens that are expected to surround the list.
370scoped_ptr<ListNode> Parser::ParseList(const Token& expected_begin,
371                                       const Token& expected_end) {
372  scoped_ptr<ListNode> list(new ListNode);
373
374  const Token& open_bracket_token = cur_token();
375  list->set_begin_token(open_bracket_token);
376  cur_++;  // Skip "[" or "(".
377
378  bool need_separator = false;
379  while(true) {
380    if (at_end()) {
381      *err_ = Err(open_bracket_token, "EOF found when parsing list.",
382                  "I expected a \"" + expected_end.value().as_string() +
383                  "\" corresponding to this one.");
384      return scoped_ptr<ListNode>();
385    }
386    if (cur_token().type() == expected_end.type() &&
387        cur_token().value() == expected_end.value()) {
388      list->set_end_token(cur_token());
389      cur_++;
390      break;
391    }
392
393    if (need_separator) {
394      DCHECK(!list->contents().empty());
395      LocationRange prev_item_range =
396          list->contents().at(list->contents().size() - 1)->GetRange();
397      *err_ = Err(prev_item_range.end(),
398                  "Need comma separating items in list.",
399                  "You probably need a comma after this thingy.");
400      err_->AppendRange(prev_item_range);
401      return scoped_ptr<ListNode>();
402    }
403    scoped_ptr<ParseNode> expr = ParseExpression().Pass();
404    if (has_error())
405      return scoped_ptr<ListNode>();
406    list->append_item(expr.Pass());
407
408    need_separator = true;
409    if (!at_end()) {
410      // Skip over the separator, marking that we found it.
411      if (cur_token().type() == Token::SEPARATOR) {
412        cur_++;
413        need_separator = false;
414      }
415    }
416  }
417  return list.Pass();
418}
419
420// paren_expression := "(" expression ")"
421scoped_ptr<ParseNode> Parser::ParseParenExpression() {
422  const Token& open_paren_token = cur_token();
423  cur_++;  // Skip over (
424
425  scoped_ptr<ParseNode> ret = ParseExpression();
426  if (has_error())
427    return scoped_ptr<ParseNode>();
428
429  if (at_end()) {
430    *err_ = Err(open_paren_token, "EOF found when parsing expression.",
431                "I was looking for a \")\" corresponding to this one.");
432    return scoped_ptr<ParseNode>();
433  }
434  if (!cur_token().IsScoperEqualTo(")")) {
435    *err_ = Err(open_paren_token, "Expected \")\" for expression",
436                "I was looking for a \")\" corresponding to this one.");
437    return scoped_ptr<ParseNode>();
438  }
439  cur_++;  // Skip over )
440  return ret.Pass();
441}
442
443// unary_expression := "!" expression
444scoped_ptr<UnaryOpNode> Parser::ParseUnaryOp() {
445  scoped_ptr<UnaryOpNode> unary(new UnaryOpNode);
446
447  DCHECK(!at_end() && IsUnaryOperator(cur_token()));
448  const Token& op_token = cur_token();
449  unary->set_op(op_token);
450  cur_++;
451
452  if (at_end()) {
453    *err_ = Err(op_token, "Expected expression.",
454                "This operator needs something to operate on.");
455    return scoped_ptr<UnaryOpNode>();
456  }
457  unary->set_operand(ParseExpression().Pass());
458  if (has_error())
459    return scoped_ptr<UnaryOpNode>();
460  return unary.Pass();
461}
462
463Err Parser::MakeEOFError(const std::string& message,
464                         const std::string& help) const {
465  if (tokens_.empty())
466    return Err(Location(NULL, 1, 1), message, help);
467
468  const Token& last = tokens_[tokens_.size() - 1];
469  return Err(last, message, help);
470}
471