1d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// Use of this source code is governed by a BSD-style license that can be
3d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// found in the LICENSE file.
4d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
5d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "tools/gn/pattern.h"
6d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
7d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "tools/gn/value.h"
8d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
93551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)const char kPattern_Help[] =
103551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "Patterns\n"
113551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  Patterns are VERY limited regular expressions that are used in\n"
123551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  several places.\n"
133551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "\n"
143551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  Patterns must match the entire input string to be counted as a match.\n"
153551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  In regular expression parlance, there is an implicit \"^...$\"\n"
163551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  surrounding your input. If you want to match a substring, you need to\n"
173551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  use wildcards at the beginning and end.\n"
183551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "\n"
193551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  There are only two special tokens understood by the pattern matcher.\n"
203551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  Everything else is a literal.\n"
213551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "\n"
223551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "   * Matches zero or more of any character. It does not depend on the\n"
233551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "     preceeding character (in regular expression parlance it is\n"
243551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "     equivalent to \".*\").\n"
253551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "\n"
263551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  \\b Matches a path boundary. This will match the beginning or end of\n"
273551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "     a string, or a slash.\n"
283551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "\n"
2968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    "Examples\n"
3068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    "\n"
313551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  \"*asdf*\"\n"
323551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "      Matches a string containing \"asdf\" anywhere.\n"
333551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "\n"
343551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  \"asdf\"\n"
353551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "      Matches only the exact string \"asdf\".\n"
363551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "\n"
373551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  \"*.cc\"\n"
383551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "      Matches strings ending in the literal \".cc\".\n"
393551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "\n"
403551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "  \"\\bwin/*\"\n"
413551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    "      Matches \"win/foo\" and \"foo/win/bar.cc\" but not \"iwin/foo\".\n";
423551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
43d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochnamespace {
44d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
45d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochvoid ParsePattern(const std::string& s, std::vector<Pattern::Subrange>* out) {
46d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  // Set when the last subrange is a literal so we can just append when we
47d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  // find another literal.
48d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  Pattern::Subrange* last_literal = NULL;
49d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
50d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  for (size_t i = 0; i < s.size(); i++) {
51d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    if (s[i] == '*') {
52d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      // Don't allow two **.
53d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      if (out->size() == 0 ||
54d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          (*out)[out->size() - 1].type != Pattern::Subrange::ANYTHING)
55d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        out->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING));
56d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      last_literal = NULL;
57d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    } else if (s[i] == '\\') {
58d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      if (i < s.size() - 1 && s[i + 1] == 'b') {
59d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        // "\b" means path boundary.
60d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        i++;
61d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        out->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY));
62d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        last_literal = NULL;
63d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      } else {
64d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        // Backslash + anything else means that literal char.
65d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        if (!last_literal) {
66d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
67d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          last_literal = &(*out)[out->size() - 1];
68d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        }
69d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        if (i < s.size() - 1) {
70d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          i++;
71d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          last_literal->literal.push_back(s[i]);
72d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        } else {
73d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          // Single backslash at end, use literal backslash.
74d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          last_literal->literal.push_back('\\');
75d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        }
76d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      }
77d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    } else {
78d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      if (!last_literal) {
79d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
80d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        last_literal = &(*out)[out->size() - 1];
81d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      }
82d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      last_literal->literal.push_back(s[i]);
83d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    }
84d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  }
85d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}
86d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
87d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}  // namespace
88d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
89d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochPattern::Pattern(const std::string& s) {
90d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  ParsePattern(s, &subranges_);
91d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  is_suffix_ =
92d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      (subranges_.size() == 2 &&
93d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch       subranges_[0].type == Subrange::ANYTHING &&
94d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch       subranges_[1].type == Subrange::LITERAL);
95d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}
96d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
97d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochPattern::~Pattern() {
98d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}
99d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
100d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool Pattern::MatchesString(const std::string& s) const {
101d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  // Empty pattern matches only empty string.
102d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  if (subranges_.empty())
103d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    return s.empty();
104d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
105d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  if (is_suffix_) {
106d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    const std::string& suffix = subranges_[1].literal;
107d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    if (suffix.size() > s.size())
108d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      return false;  // Too short.
109d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
110d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  }
111d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
112d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  return RecursiveMatch(s, 0, 0, true);
113d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}
114d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
115d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// We assume the number of ranges is small so recursive is always reasonable.
116d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// Could be optimized to only be recursive for *.
117d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool Pattern::RecursiveMatch(const std::string& s,
118d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch                             size_t begin_char,
119d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch                             size_t subrange_index,
120d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch                             bool allow_implicit_path_boundary) const {
121d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  if (subrange_index >= subranges_.size()) {
122d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    // Hit the end of our subranges, the text should also be at the end for a
123d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    // match.
124d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    return begin_char == s.size();
125d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  }
126d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
127d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  const Subrange& sr = subranges_[subrange_index];
128d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  switch (sr.type) {
129d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    case Subrange::LITERAL: {
130d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      if (s.size() - begin_char < sr.literal.size())
131d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        return false;  // Not enough room.
132d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      if (s.compare(begin_char, sr.literal.size(), sr.literal) != 0)
133d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        return false;  // Literal doesn't match.
134d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
135d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      // Recursively check the next one.
136d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      return RecursiveMatch(s, begin_char + sr.literal.size(),
137d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch                            subrange_index + 1, true);
138d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    }
139d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
140d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    case Subrange::PATH_BOUNDARY: {
141d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      // When we can accept an implicit path boundary, we have to check both
142d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      // a match of the literal and the implicit one.
143d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      if (allow_implicit_path_boundary &&
144d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          (begin_char == 0 || begin_char == s.size())) {
145d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        // At implicit path boundary, see if the rest of the pattern matches.
146d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        if (RecursiveMatch(s, begin_char, subrange_index + 1, false))
147d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          return true;
148d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      }
149d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
150d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      // Check for a literal "/".
151d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      if (begin_char < s.size() && s[begin_char] == '/') {
152d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        // At explicit boundary, see if the rest of the pattern matches.
153d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        if (RecursiveMatch(s, begin_char + 1, subrange_index + 1, true))
154d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          return true;
155d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      }
156d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      return false;
157d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    }
158d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
159d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    case Subrange::ANYTHING: {
160d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      if (subrange_index == subranges_.size() - 1)
161d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        return true;  // * at the end, consider it matching.
162d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
163d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      size_t min_next_size = sr.MinSize();
164d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
165d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      // We don't care about exactly what matched as long as there was a match,
166d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      // so we can do this front-to-back. If we needed the match, we would
167d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      // normally want "*" to be greedy so would work backwards.
168d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      for (size_t i = begin_char; i < s.size() - min_next_size; i++) {
169d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        // Note: this could probably be faster by detecting the type of the
170d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        // next match in advance and checking for a match in this loop rather
171d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        // than doing a full recursive call for each character.
172d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch        if (RecursiveMatch(s, i, subrange_index + 1, true))
173d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch          return true;
174d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      }
175d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      return false;
176d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    }
177d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
178d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    default:
179d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      NOTREACHED();
180d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  }
181d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
182d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  return false;
183d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}
184d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
185d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochPatternList::PatternList() {
186d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}
187d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
188d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochPatternList::~PatternList() {
189d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}
190d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
191d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochvoid PatternList::SetFromValue(const Value& v, Err* err) {
192d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  patterns_.clear();
193d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
194d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  if (v.type() != Value::LIST) {
195d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    *err = Err(v.origin(), "This value must be a list.");
196d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    return;
197d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  }
198d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
199d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  const std::vector<Value>& list = v.list_value();
200d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  for (size_t i = 0; i < list.size(); i++) {
201d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    if (!list[i].VerifyTypeIs(Value::STRING, err))
202d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      return;
203d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    patterns_.push_back(Pattern(list[i].string_value()));
204d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  }
205d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}
206d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
207d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool PatternList::MatchesString(const std::string& s) const {
208d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  for (size_t i = 0; i < patterns_.size(); i++) {
209d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    if (patterns_[i].MatchesString(s))
210d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch      return true;
211d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  }
212d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  return false;
213d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}
214d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch
215d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool PatternList::MatchesValue(const Value& v) const {
216d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  if (v.type() == Value::STRING)
217d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch    return MatchesString(v.string_value());
218d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch  return false;
219d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}
220