parser.cc revision b3af68b02c9e569be81e13e910d61ce6e979bb19
1// Copyright 2015 Google Inc. All rights reserved
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// +build ignore
16
17#include "parser.h"
18
19#include <stack>
20#include <unordered_map>
21
22#include "ast.h"
23#include "file.h"
24#include "loc.h"
25#include "log.h"
26#include "string_piece.h"
27#include "strutil.h"
28#include "value.h"
29
30enum struct ParserState {
31  NOT_AFTER_RULE = 0,
32  AFTER_RULE,
33  MAYBE_AFTER_RULE,
34};
35
36class Parser {
37  struct IfState {
38    IfAST* ast;
39    bool is_in_else;
40    int num_nest;
41  };
42
43  typedef void (Parser::*DirectiveHandler)(
44      StringPiece line, StringPiece directive);
45  typedef unordered_map<StringPiece, DirectiveHandler> DirectiveMap;
46
47 public:
48  Parser(StringPiece buf, const char* filename, vector<AST*>* asts)
49      : buf_(buf),
50        state_(ParserState::NOT_AFTER_RULE),
51        asts_(asts),
52        out_asts_(asts),
53        num_if_nest_(0),
54        loc_(filename, 0),
55        fixed_lineno_(false) {
56  }
57
58  Parser(StringPiece buf, const Loc& loc, vector<AST*>* asts)
59      : buf_(buf),
60        state_(ParserState::NOT_AFTER_RULE),
61        asts_(asts),
62        out_asts_(asts),
63        num_if_nest_(0),
64        loc_(loc),
65        fixed_lineno_(true) {
66  }
67
68  ~Parser() {
69  }
70
71  void Parse() {
72    l_ = 0;
73
74    for (l_ = 0; l_ < buf_.size();) {
75      size_t lf_cnt = 0;
76      size_t e = FindEndOfLine(&lf_cnt);
77      if (!fixed_lineno_)
78        loc_.lineno++;
79      StringPiece line(buf_.data() + l_, e - l_);
80      ParseLine(line);
81      if (!fixed_lineno_)
82        loc_.lineno += lf_cnt - 1;
83      if (e == buf_.size())
84        break;
85
86      l_ = e + 1;
87    }
88  }
89
90  static void Init() {
91    make_directives_ = new DirectiveMap;
92    (*make_directives_)["include"] = &Parser::ParseInclude;
93    (*make_directives_)["-include"] = &Parser::ParseInclude;
94    (*make_directives_)["sinclude"] = &Parser::ParseInclude;
95    (*make_directives_)["define"] = &Parser::ParseDefine;
96    (*make_directives_)["ifdef"] = &Parser::ParseIfdef;
97    (*make_directives_)["ifndef"] = &Parser::ParseIfdef;
98    (*make_directives_)["ifeq"] = &Parser::ParseIfeq;
99    (*make_directives_)["ifneq"] = &Parser::ParseIfeq;
100    (*make_directives_)["else"] = &Parser::ParseElse;
101    (*make_directives_)["endif"] = &Parser::ParseEndif;
102
103    else_if_directives_ = new DirectiveMap;
104    (*else_if_directives_)["ifdef"] = &Parser::ParseIfdef;
105    (*else_if_directives_)["ifndef"] = &Parser::ParseIfdef;
106
107    shortest_directive_len_ = 9999;
108    longest_directive_len_ = 0;
109    for (auto p : *make_directives_) {
110      size_t len = p.first.size();
111      shortest_directive_len_ = min(len, shortest_directive_len_);
112      longest_directive_len_ = max(len, longest_directive_len_);
113    }
114  }
115
116  static void Quit() {
117    delete make_directives_;
118  }
119
120 private:
121  void Error(const string& msg) {
122    ERROR("%s:%d: %s", LOCF(loc_), msg.c_str());
123  }
124
125  size_t FindEndOfLine(size_t* lf_cnt) {
126    size_t e = l_;
127    bool prev_backslash = false;
128    for (; e < buf_.size(); e++) {
129      char c = buf_[e];
130      if (c == '\\') {
131        prev_backslash = !prev_backslash;
132      } else if (c == '\n') {
133        ++*lf_cnt;
134        if (!prev_backslash) {
135          return e;
136        }
137      } else if (c != '\r') {
138        prev_backslash = false;
139      }
140    }
141    return e;
142  }
143
144  void ParseLine(StringPiece line) {
145    if (line.empty() || (line.size() == 1 && line[0] == '\r'))
146      return;
147
148    if (!define_name_.empty()) {
149      ParseInsideDefine(line);
150      return;
151    }
152
153    if (line[0] == '\t' && state_ != ParserState::NOT_AFTER_RULE) {
154      CommandAST* ast = new CommandAST();
155      ast->set_loc(loc_);
156      ast->expr = ParseExpr(line.substr(1), ParseExprOpt::COMMAND);
157      out_asts_->push_back(ast);
158      return;
159    }
160
161    line = TrimLeftSpace(line);
162
163    if (line[0] == '#')
164      return;
165
166    if (HandleDirective(line, make_directives_)) {
167      return;
168    }
169
170    size_t sep = FindTwoOutsideParen(line, ':', '=');
171    if (sep == string::npos) {
172      ParseRule(line, sep);
173    } else if (line[sep] == '=') {
174      ParseAssign(line, sep);
175    } else if (line.get(sep+1) == '=') {
176      ParseAssign(line, sep+1);
177    } else if (line[sep] == ':') {
178      ParseRule(line, sep);
179    } else {
180      CHECK(false);
181    }
182  }
183
184  void ParseRule(StringPiece line, size_t sep) {
185    const bool is_rule = sep != string::npos && line[sep] == ':';
186    RuleAST* ast = new RuleAST();
187    ast->set_loc(loc_);
188
189    size_t found = FindTwoOutsideParen(line.substr(sep + 1), '=', ';');
190    if (found != string::npos) {
191      found += sep + 1;
192      ast->term = line[found];
193      ParseExprOpt opt =
194          ast->term == ';' ? ParseExprOpt::COMMAND : ParseExprOpt::NORMAL;
195      ast->after_term = ParseExpr(TrimLeftSpace(line.substr(found + 1)), opt);
196      ast->expr = ParseExpr(TrimSpace(line.substr(0, found)));
197    } else {
198      ast->term = 0;
199      ast->after_term = NULL;
200      ast->expr = ParseExpr(TrimSpace(line));
201    }
202    out_asts_->push_back(ast);
203    state_ = is_rule ? ParserState::AFTER_RULE : ParserState::MAYBE_AFTER_RULE;
204  }
205
206  void ParseAssign(StringPiece line, size_t sep) {
207    if (sep == 0)
208      Error("*** empty variable name ***");
209    StringPiece lhs;
210    StringPiece rhs;
211    AssignOp op;
212    ParseAssignStatement(line, sep, &lhs, &rhs, &op);
213
214    AssignAST* ast = new AssignAST();
215    ast->set_loc(loc_);
216    ast->lhs = ParseExpr(lhs);
217    ast->rhs = ParseExpr(rhs);
218    ast->orig_rhs = rhs;
219    ast->op = op;
220    ast->directive = AssignDirective::NONE;
221    out_asts_->push_back(ast);
222    state_ = ParserState::NOT_AFTER_RULE;
223  }
224
225  void ParseInclude(StringPiece line, StringPiece directive) {
226    IncludeAST* ast = new IncludeAST();
227    ast->set_loc(loc_);
228    ast->expr = ParseExpr(line);
229    ast->should_exist = directive[0] == 'i';
230    out_asts_->push_back(ast);
231    state_ = ParserState::NOT_AFTER_RULE;
232  }
233
234  void ParseDefine(StringPiece line, StringPiece) {
235    if (line.empty()) {
236      Error("*** empty variable name.");
237    }
238    define_name_ = line;
239    define_start_ = 0;
240    define_start_line_ = loc_.lineno;
241    state_ = ParserState::NOT_AFTER_RULE;
242  }
243
244  void ParseInsideDefine(StringPiece line) {
245    line = TrimLeftSpace(line);
246    if (GetDirective(line) != "endef") {
247      if (define_start_ == 0)
248        define_start_ = l_;
249      return;
250    }
251
252    StringPiece rest = TrimRightSpace(RemoveComment(TrimLeftSpace(
253        line.substr(sizeof("endef")))));
254    if (!rest.empty()) {
255      WARN("%s:%d: extraneous text after `endef' directive", LOCF(loc_));
256    }
257
258    AssignAST* ast = new AssignAST();
259    ast->set_loc(Loc(loc_.filename, define_start_line_));
260    ast->lhs = ParseExpr(define_name_);
261    StringPiece rhs;
262    if (define_start_)
263      rhs = TrimRightSpace(buf_.substr(define_start_, l_ - define_start_));
264    ast->rhs = ParseExpr(rhs, ParseExprOpt::DEFINE);
265    ast->orig_rhs = rhs;
266    ast->op = AssignOp::EQ;
267    ast->directive = AssignDirective::NONE;
268    out_asts_->push_back(ast);
269    define_name_.clear();
270  }
271
272  void EnterIf(IfAST* ast) {
273    IfState* st = new IfState();
274    st->ast = ast;
275    st->is_in_else = false;
276    st->num_nest = num_if_nest_;
277    if_stack_.push(st);
278    out_asts_ = &ast->true_asts;
279  }
280
281  void ParseIfdef(StringPiece line, StringPiece directive) {
282    IfAST* ast = new IfAST();
283    ast->set_loc(loc_);
284    ast->op = directive[2] == 'n' ? CondOp::IFNDEF : CondOp::IFDEF;
285    ast->lhs = ParseExpr(line);
286    ast->rhs = NULL;
287    out_asts_->push_back(ast);
288    EnterIf(ast);
289  }
290
291  bool ParseIfEqCond(StringPiece s, IfAST* ast) {
292    if (s.empty()) {
293      return false;
294    }
295
296    if (s[0] == '(' && s[s.size() - 1] == ')') {
297      s = s.substr(1, s.size() - 2);
298      char terms[] = {',', '\0'};
299      size_t n;
300      ast->lhs = ParseExprImpl(s, terms, ParseExprOpt::NORMAL, &n, true);
301      if (s[n] != ',')
302        return false;
303      s = TrimLeftSpace(s.substr(n+1));
304      ast->rhs = ParseExprImpl(s, NULL, ParseExprOpt::NORMAL, &n);
305      s = TrimLeftSpace(s.substr(n));
306    } else {
307      for (int i = 0; i < 2; i++) {
308        if (s.empty())
309          return false;
310        char quote = s[0];
311        if (quote != '\'' && quote != '"')
312          return false;
313        size_t end = s.find(quote, 1);
314        if (end == string::npos)
315          return false;
316        Value* v = ParseExpr(s.substr(1, end - 1), ParseExprOpt::NORMAL);
317        if (i == 0)
318          ast->lhs = v;
319        else
320          ast->rhs = v;
321        s = TrimLeftSpace(s.substr(end+1));
322      }
323    }
324    if (!s.empty()) {
325      Error("extraneous text after `ifeq' directive");
326    }
327    return true;
328  }
329
330  void ParseIfeq(StringPiece line, StringPiece directive) {
331    IfAST* ast = new IfAST();
332    ast->set_loc(loc_);
333    ast->op = directive[2] == 'n' ? CondOp::IFNEQ : CondOp::IFEQ;
334
335    if (!ParseIfEqCond(line, ast)) {
336      Error("*** invalid syntax in conditional.");
337    }
338
339    out_asts_->push_back(ast);
340    EnterIf(ast);
341  }
342
343  void ParseElse(StringPiece line, StringPiece) {
344    CheckIfStack("else");
345    IfState* st = if_stack_.top();
346    if (st->is_in_else)
347      Error("*** only one `else' per conditional.");
348    st->is_in_else = true;
349    out_asts_ = &st->ast->false_asts;
350
351    StringPiece next_if = TrimLeftSpace(line);
352    if (next_if.empty())
353      return;
354
355    num_if_nest_ = st->num_nest + 1;
356    if (!HandleDirective(next_if, else_if_directives_)) {
357      WARN("%s:%d: extraneous text after `else' directive", LOCF(loc_));
358    }
359    num_if_nest_ = 0;
360  }
361
362  void ParseEndif(StringPiece line, StringPiece) {
363    CheckIfStack("endif");
364    if (!line.empty())
365      Error("extraneous text after `endif` directive");
366    IfState st = *if_stack_.top();
367    for (int t = 0; t <= st.num_nest; t++) {
368      delete if_stack_.top();
369      if_stack_.pop();
370      if (if_stack_.empty()) {
371        out_asts_ = asts_;
372      } else {
373        IfState* st = if_stack_.top();
374        if (st->is_in_else)
375          out_asts_ = &st->ast->false_asts;
376        else
377          out_asts_ = &st->ast->true_asts;
378      }
379    }
380  }
381
382  void CheckIfStack(const char* keyword) {
383    if (if_stack_.empty()) {
384      Error(StringPrintf("*** extraneous `%s'.", keyword));
385    }
386  }
387
388  StringPiece RemoveComment(StringPiece line) {
389    size_t i = FindOutsideParen(line, '#');
390    if (i == string::npos)
391      return line;
392    return line.substr(0, i);
393  }
394
395  StringPiece GetDirective(StringPiece line) {
396    if (line.size() < shortest_directive_len_)
397      return StringPiece();
398    StringPiece prefix = line.substr(0, longest_directive_len_ + 1);
399    size_t space_index = prefix.find_first_of(" \t#");
400    return prefix.substr(0, space_index);
401  }
402
403  bool HandleDirective(StringPiece line, const DirectiveMap* directive_map) {
404    StringPiece directive = GetDirective(line);
405    auto found = directive_map->find(directive);
406    if (found == directive_map->end())
407      return false;
408
409    StringPiece rest = TrimRightSpace(RemoveComment(TrimLeftSpace(
410        line.substr(directive.size()))));
411    (this->*found->second)(rest, directive);
412    return true;
413  }
414
415  StringPiece buf_;
416  size_t l_;
417  ParserState state_;
418
419  vector<AST*>* asts_;
420  vector<AST*>* out_asts_;
421
422  StringPiece define_name_;
423  size_t define_start_;
424  int define_start_line_;
425
426  int num_if_nest_;
427  stack<IfState*> if_stack_;
428
429  Loc loc_;
430  bool fixed_lineno_;
431
432  static DirectiveMap* make_directives_;
433  static DirectiveMap* else_if_directives_;
434  static size_t shortest_directive_len_;
435  static size_t longest_directive_len_;
436};
437
438void Parse(Makefile* mk) {
439  Parser parser(StringPiece(mk->buf(), mk->len()),
440                mk->filename().c_str(),
441                mk->mutable_asts());
442  parser.Parse();
443}
444
445void Parse(StringPiece buf, const Loc& loc, vector<AST*>* out_asts) {
446  Parser parser(buf, loc, out_asts);
447  parser.Parse();
448}
449
450void InitParser() {
451  Parser::Init();
452}
453
454void QuitParser() {
455  Parser::Quit();
456}
457
458Parser::DirectiveMap* Parser::make_directives_;
459Parser::DirectiveMap* Parser::else_if_directives_;
460size_t Parser::shortest_directive_len_;
461size_t Parser::longest_directive_len_;
462
463void ParseAssignStatement(StringPiece line, size_t sep,
464                          StringPiece* lhs, StringPiece* rhs, AssignOp* op) {
465  CHECK(sep != 0);
466  *op = AssignOp::EQ;
467  size_t lhs_end = sep;
468  switch (line[sep-1]) {
469    case ':':
470      lhs_end--;
471      *op = AssignOp::COLON_EQ;
472      break;
473    case '+':
474      lhs_end--;
475      *op = AssignOp::PLUS_EQ;
476      break;
477    case '?':
478      lhs_end--;
479      *op = AssignOp::QUESTION_EQ;
480      break;
481  }
482  *lhs = TrimSpace(line.substr(0, lhs_end));
483  *rhs = TrimSpace(line.substr(sep + 1));
484}
485