1// Copyright 2015 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/pattern.h"
6
7#include "base/third_party/icu/icu_utf.h"
8
9namespace base {
10
11namespace {
12
13static bool IsWildcard(base_icu::UChar32 character) {
14  return character == '*' || character == '?';
15}
16
17// Move the strings pointers to the point where they start to differ.
18template <typename CHAR, typename NEXT>
19static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
20                         const CHAR** string, const CHAR* string_end,
21                         NEXT next) {
22  const CHAR* escape = NULL;
23  while (*pattern != pattern_end && *string != string_end) {
24    if (!escape && IsWildcard(**pattern)) {
25      // We don't want to match wildcard here, except if it's escaped.
26      return;
27    }
28
29    // Check if the escapement char is found. If so, skip it and move to the
30    // next character.
31    if (!escape && **pattern == '\\') {
32      escape = *pattern;
33      next(pattern, pattern_end);
34      continue;
35    }
36
37    // Check if the chars match, if so, increment the ptrs.
38    const CHAR* pattern_next = *pattern;
39    const CHAR* string_next = *string;
40    base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
41    if (pattern_char == next(&string_next, string_end) &&
42        pattern_char != CBU_SENTINEL) {
43      *pattern = pattern_next;
44      *string = string_next;
45    } else {
46      // Uh oh, it did not match, we are done. If the last char was an
47      // escapement, that means that it was an error to advance the ptr here,
48      // let's put it back where it was. This also mean that the MatchPattern
49      // function will return false because if we can't match an escape char
50      // here, then no one will.
51      if (escape) {
52        *pattern = escape;
53      }
54      return;
55    }
56
57    escape = NULL;
58  }
59}
60
61template <typename CHAR, typename NEXT>
62static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
63  while (*pattern != end) {
64    if (!IsWildcard(**pattern))
65      return;
66    next(pattern, end);
67  }
68}
69
70template <typename CHAR, typename NEXT>
71static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
72                          const CHAR* pattern, const CHAR* pattern_end,
73                          int depth,
74                          NEXT next) {
75  const int kMaxDepth = 16;
76  if (depth > kMaxDepth)
77    return false;
78
79  // Eat all the matching chars.
80  EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
81
82  // If the string is empty, then the pattern must be empty too, or contains
83  // only wildcards.
84  if (eval == eval_end) {
85    EatWildcard(&pattern, pattern_end, next);
86    return pattern == pattern_end;
87  }
88
89  // Pattern is empty but not string, this is not a match.
90  if (pattern == pattern_end)
91    return false;
92
93  // If this is a question mark, then we need to compare the rest with
94  // the current string or the string with one character eaten.
95  const CHAR* next_pattern = pattern;
96  next(&next_pattern, pattern_end);
97  if (pattern[0] == '?') {
98    if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
99                      depth + 1, next))
100      return true;
101    const CHAR* next_eval = eval;
102    next(&next_eval, eval_end);
103    if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
104                      depth + 1, next))
105      return true;
106  }
107
108  // This is a *, try to match all the possible substrings with the remainder
109  // of the pattern.
110  if (pattern[0] == '*') {
111    // Collapse duplicate wild cards (********** into *) so that the
112    // method does not recurse unnecessarily. http://crbug.com/52839
113    EatWildcard(&next_pattern, pattern_end, next);
114
115    while (eval != eval_end) {
116      if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
117                        depth + 1, next))
118        return true;
119      eval++;
120    }
121
122    // We reached the end of the string, let see if the pattern contains only
123    // wildcards.
124    if (eval == eval_end) {
125      EatWildcard(&pattern, pattern_end, next);
126      if (pattern != pattern_end)
127        return false;
128      return true;
129    }
130  }
131
132  return false;
133}
134
135struct NextCharUTF8 {
136  base_icu::UChar32 operator()(const char** p, const char* end) {
137    base_icu::UChar32 c;
138    int offset = 0;
139    CBU8_NEXT(*p, offset, end - *p, c);
140    *p += offset;
141    return c;
142  }
143};
144
145struct NextCharUTF16 {
146  base_icu::UChar32 operator()(const char16** p, const char16* end) {
147    base_icu::UChar32 c;
148    int offset = 0;
149    CBU16_NEXT(*p, offset, end - *p, c);
150    *p += offset;
151    return c;
152  }
153};
154
155}  // namespace
156
157bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {
158  return MatchPatternT(eval.data(), eval.data() + eval.size(),
159                       pattern.data(), pattern.data() + pattern.size(),
160                       0, NextCharUTF8());
161}
162
163bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) {
164  return MatchPatternT(eval.data(), eval.data() + eval.size(),
165                       pattern.data(), pattern.data() + pattern.size(),
166                       0, NextCharUTF16());
167}
168
169}  // namespace base
170