1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "tools/gn/pattern.h"
6
7#include "tools/gn/value.h"
8
9namespace {
10
11void ParsePattern(const std::string& s, std::vector<Pattern::Subrange>* out) {
12  // Set when the last subrange is a literal so we can just append when we
13  // find another literal.
14  Pattern::Subrange* last_literal = NULL;
15
16  for (size_t i = 0; i < s.size(); i++) {
17    if (s[i] == '*') {
18      // Don't allow two **.
19      if (out->size() == 0 ||
20          (*out)[out->size() - 1].type != Pattern::Subrange::ANYTHING)
21        out->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING));
22      last_literal = NULL;
23    } else if (s[i] == '\\') {
24      if (i < s.size() - 1 && s[i + 1] == 'b') {
25        // "\b" means path boundary.
26        i++;
27        out->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY));
28        last_literal = NULL;
29      } else {
30        // Backslash + anything else means that literal char.
31        if (!last_literal) {
32          out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
33          last_literal = &(*out)[out->size() - 1];
34        }
35        if (i < s.size() - 1) {
36          i++;
37          last_literal->literal.push_back(s[i]);
38        } else {
39          // Single backslash at end, use literal backslash.
40          last_literal->literal.push_back('\\');
41        }
42      }
43    } else {
44      if (!last_literal) {
45        out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
46        last_literal = &(*out)[out->size() - 1];
47      }
48      last_literal->literal.push_back(s[i]);
49    }
50  }
51}
52
53}  // namespace
54
55Pattern::Pattern(const std::string& s) {
56  ParsePattern(s, &subranges_);
57  is_suffix_ =
58      (subranges_.size() == 2 &&
59       subranges_[0].type == Subrange::ANYTHING &&
60       subranges_[1].type == Subrange::LITERAL);
61}
62
63Pattern::~Pattern() {
64}
65
66bool Pattern::MatchesString(const std::string& s) const {
67  // Empty pattern matches only empty string.
68  if (subranges_.empty())
69    return s.empty();
70
71  if (is_suffix_) {
72    const std::string& suffix = subranges_[1].literal;
73    if (suffix.size() > s.size())
74      return false;  // Too short.
75    return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
76  }
77
78  return RecursiveMatch(s, 0, 0, true);
79}
80
81// We assume the number of ranges is small so recursive is always reasonable.
82// Could be optimized to only be recursive for *.
83bool Pattern::RecursiveMatch(const std::string& s,
84                             size_t begin_char,
85                             size_t subrange_index,
86                             bool allow_implicit_path_boundary) const {
87  if (subrange_index >= subranges_.size()) {
88    // Hit the end of our subranges, the text should also be at the end for a
89    // match.
90    return begin_char == s.size();
91  }
92
93  const Subrange& sr = subranges_[subrange_index];
94  switch (sr.type) {
95    case Subrange::LITERAL: {
96      if (s.size() - begin_char < sr.literal.size())
97        return false;  // Not enough room.
98      if (s.compare(begin_char, sr.literal.size(), sr.literal) != 0)
99        return false;  // Literal doesn't match.
100
101      // Recursively check the next one.
102      return RecursiveMatch(s, begin_char + sr.literal.size(),
103                            subrange_index + 1, true);
104    }
105
106    case Subrange::PATH_BOUNDARY: {
107      // When we can accept an implicit path boundary, we have to check both
108      // a match of the literal and the implicit one.
109      if (allow_implicit_path_boundary &&
110          (begin_char == 0 || begin_char == s.size())) {
111        // At implicit path boundary, see if the rest of the pattern matches.
112        if (RecursiveMatch(s, begin_char, subrange_index + 1, false))
113          return true;
114      }
115
116      // Check for a literal "/".
117      if (begin_char < s.size() && s[begin_char] == '/') {
118        // At explicit boundary, see if the rest of the pattern matches.
119        if (RecursiveMatch(s, begin_char + 1, subrange_index + 1, true))
120          return true;
121      }
122      return false;
123    }
124
125    case Subrange::ANYTHING: {
126      if (subrange_index == subranges_.size() - 1)
127        return true;  // * at the end, consider it matching.
128
129      size_t min_next_size = sr.MinSize();
130
131      // We don't care about exactly what matched as long as there was a match,
132      // so we can do this front-to-back. If we needed the match, we would
133      // normally want "*" to be greedy so would work backwards.
134      for (size_t i = begin_char; i < s.size() - min_next_size; i++) {
135        // Note: this could probably be faster by detecting the type of the
136        // next match in advance and checking for a match in this loop rather
137        // than doing a full recursive call for each character.
138        if (RecursiveMatch(s, i, subrange_index + 1, true))
139          return true;
140      }
141      return false;
142    }
143
144    default:
145      NOTREACHED();
146  }
147
148  return false;
149}
150
151PatternList::PatternList() {
152}
153
154PatternList::~PatternList() {
155}
156
157void PatternList::Append(const Pattern& pattern) {
158  patterns_.push_back(pattern);
159}
160
161void PatternList::SetFromValue(const Value& v, Err* err) {
162  patterns_.clear();
163
164  if (v.type() != Value::LIST) {
165    *err = Err(v.origin(), "This value must be a list.");
166    return;
167  }
168
169  const std::vector<Value>& list = v.list_value();
170  for (size_t i = 0; i < list.size(); i++) {
171    if (!list[i].VerifyTypeIs(Value::STRING, err))
172      return;
173    patterns_.push_back(Pattern(list[i].string_value()));
174  }
175}
176
177bool PatternList::MatchesString(const std::string& s) const {
178  for (size_t i = 0; i < patterns_.size(); i++) {
179    if (patterns_[i].MatchesString(s))
180      return true;
181  }
182  return false;
183}
184
185bool PatternList::MatchesValue(const Value& v) const {
186  if (v.type() == Value::STRING)
187    return MatchesString(v.string_value());
188  return false;
189}
190