1d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// Copyright (c) 2013 The Chromium Authors. All rights reserved. 2d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// Use of this source code is governed by a BSD-style license that can be 3d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// found in the LICENSE file. 4d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 5d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "tools/gn/pattern.h" 6d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 7d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "tools/gn/value.h" 8d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 93551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)const char kPattern_Help[] = 103551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) "Patterns\n" 113551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " Patterns are VERY limited regular expressions that are used in\n" 123551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " several places.\n" 133551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) "\n" 143551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " Patterns must match the entire input string to be counted as a match.\n" 153551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " In regular expression parlance, there is an implicit \"^...$\"\n" 163551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " surrounding your input. If you want to match a substring, you need to\n" 173551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " use wildcards at the beginning and end.\n" 183551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) "\n" 193551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " There are only two special tokens understood by the pattern matcher.\n" 203551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " Everything else is a literal.\n" 213551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) "\n" 223551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " * Matches zero or more of any character. It does not depend on the\n" 233551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " preceeding character (in regular expression parlance it is\n" 243551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " equivalent to \".*\").\n" 253551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) "\n" 263551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " \\b Matches a path boundary. This will match the beginning or end of\n" 273551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " a string, or a slash.\n" 283551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) "\n" 2968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) "Examples\n" 3068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) "\n" 313551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " \"*asdf*\"\n" 323551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " Matches a string containing \"asdf\" anywhere.\n" 333551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) "\n" 343551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " \"asdf\"\n" 353551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " Matches only the exact string \"asdf\".\n" 363551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) "\n" 373551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " \"*.cc\"\n" 383551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " Matches strings ending in the literal \".cc\".\n" 393551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) "\n" 403551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " \"\\bwin/*\"\n" 413551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) " Matches \"win/foo\" and \"foo/win/bar.cc\" but not \"iwin/foo\".\n"; 423551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 43d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochnamespace { 44d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 45d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochvoid ParsePattern(const std::string& s, std::vector<Pattern::Subrange>* out) { 46d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Set when the last subrange is a literal so we can just append when we 47d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // find another literal. 48d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Pattern::Subrange* last_literal = NULL; 49d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 50d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch for (size_t i = 0; i < s.size(); i++) { 51d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (s[i] == '*') { 52d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Don't allow two **. 53d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (out->size() == 0 || 54d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch (*out)[out->size() - 1].type != Pattern::Subrange::ANYTHING) 55d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch out->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING)); 56d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch last_literal = NULL; 57d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } else if (s[i] == '\\') { 58d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (i < s.size() - 1 && s[i + 1] == 'b') { 59d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // "\b" means path boundary. 60d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch i++; 61d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch out->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY)); 62d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch last_literal = NULL; 63d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } else { 64d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Backslash + anything else means that literal char. 65d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (!last_literal) { 66d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL)); 67d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch last_literal = &(*out)[out->size() - 1]; 68d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 69d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (i < s.size() - 1) { 70d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch i++; 71d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch last_literal->literal.push_back(s[i]); 72d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } else { 73d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Single backslash at end, use literal backslash. 74d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch last_literal->literal.push_back('\\'); 75d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 76d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 77d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } else { 78d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (!last_literal) { 79d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL)); 80d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch last_literal = &(*out)[out->size() - 1]; 81d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 82d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch last_literal->literal.push_back(s[i]); 83d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 84d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 85d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 86d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 87d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} // namespace 88d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 89d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochPattern::Pattern(const std::string& s) { 90d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch ParsePattern(s, &subranges_); 91d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch is_suffix_ = 92d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch (subranges_.size() == 2 && 93d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch subranges_[0].type == Subrange::ANYTHING && 94d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch subranges_[1].type == Subrange::LITERAL); 95d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 96d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 97d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochPattern::~Pattern() { 98d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 99d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 100d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool Pattern::MatchesString(const std::string& s) const { 101d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Empty pattern matches only empty string. 102d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (subranges_.empty()) 103d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return s.empty(); 104d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 105d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (is_suffix_) { 106d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch const std::string& suffix = subranges_[1].literal; 107d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (suffix.size() > s.size()) 108d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return false; // Too short. 109d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0; 110d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 111d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 112d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return RecursiveMatch(s, 0, 0, true); 113d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 114d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 115d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// We assume the number of ranges is small so recursive is always reasonable. 116d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// Could be optimized to only be recursive for *. 117d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool Pattern::RecursiveMatch(const std::string& s, 118d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch size_t begin_char, 119d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch size_t subrange_index, 120d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool allow_implicit_path_boundary) const { 121d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (subrange_index >= subranges_.size()) { 122d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Hit the end of our subranges, the text should also be at the end for a 123d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // match. 124d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return begin_char == s.size(); 125d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 126d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 127d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch const Subrange& sr = subranges_[subrange_index]; 128d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch switch (sr.type) { 129d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch case Subrange::LITERAL: { 130d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (s.size() - begin_char < sr.literal.size()) 131d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return false; // Not enough room. 132d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (s.compare(begin_char, sr.literal.size(), sr.literal) != 0) 133d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return false; // Literal doesn't match. 134d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 135d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Recursively check the next one. 136d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return RecursiveMatch(s, begin_char + sr.literal.size(), 137d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch subrange_index + 1, true); 138d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 139d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 140d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch case Subrange::PATH_BOUNDARY: { 141d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // When we can accept an implicit path boundary, we have to check both 142d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // a match of the literal and the implicit one. 143d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (allow_implicit_path_boundary && 144d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch (begin_char == 0 || begin_char == s.size())) { 145d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // At implicit path boundary, see if the rest of the pattern matches. 146d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (RecursiveMatch(s, begin_char, subrange_index + 1, false)) 147d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return true; 148d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 149d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 150d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Check for a literal "/". 151d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (begin_char < s.size() && s[begin_char] == '/') { 152d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // At explicit boundary, see if the rest of the pattern matches. 153d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (RecursiveMatch(s, begin_char + 1, subrange_index + 1, true)) 154d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return true; 155d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 156d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return false; 157d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 158d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 159d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch case Subrange::ANYTHING: { 160d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (subrange_index == subranges_.size() - 1) 161d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return true; // * at the end, consider it matching. 162d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 163d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch size_t min_next_size = sr.MinSize(); 164d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 165d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // We don't care about exactly what matched as long as there was a match, 166d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // so we can do this front-to-back. If we needed the match, we would 167d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // normally want "*" to be greedy so would work backwards. 168d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch for (size_t i = begin_char; i < s.size() - min_next_size; i++) { 169d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Note: this could probably be faster by detecting the type of the 170d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // next match in advance and checking for a match in this loop rather 171d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // than doing a full recursive call for each character. 172d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (RecursiveMatch(s, i, subrange_index + 1, true)) 173d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return true; 174d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 175d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return false; 176d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 177d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 178d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch default: 179d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch NOTREACHED(); 180d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 181d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 182d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return false; 183d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 184d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 185d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochPatternList::PatternList() { 186d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 187d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 188d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochPatternList::~PatternList() { 189d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 190d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 191d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochvoid PatternList::SetFromValue(const Value& v, Err* err) { 192d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch patterns_.clear(); 193d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 194d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (v.type() != Value::LIST) { 195d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch *err = Err(v.origin(), "This value must be a list."); 196d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return; 197d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 198d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 199d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch const std::vector<Value>& list = v.list_value(); 200d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch for (size_t i = 0; i < list.size(); i++) { 201d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (!list[i].VerifyTypeIs(Value::STRING, err)) 202d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return; 203d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch patterns_.push_back(Pattern(list[i].string_value())); 204d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 205d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 206d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 207d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool PatternList::MatchesString(const std::string& s) const { 208d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch for (size_t i = 0; i < patterns_.size(); i++) { 209d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (patterns_[i].MatchesString(s)) 210d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return true; 211d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 212d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return false; 213d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 214d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 215d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool PatternList::MatchesValue(const Value& v) const { 216d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (v.type() == Value::STRING) 217d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return MatchesString(v.string_value()); 218d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return false; 219d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 220