1// Copyright (C) 2011 The Libphonenumber Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Author: George Yakovlev
16//         Philippe Liard
17//
18// RegExp adapter to allow a pluggable regexp engine. It has been introduced
19// during the integration of the open-source version of this library into
20// Chromium to be able to use the ICU Regex engine instead of RE2, which is not
21// officially supported on Windows.
22// Since RE2 was initially used in this library, the interface of this adapter
23// is very close to the subset of the RE2 API used in phonenumberutil.cc.
24
25#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
26#define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
27
28#include <cstddef>
29#include <string>
30
31namespace i18n {
32namespace phonenumbers {
33
34using std::string;
35
36// RegExpInput is the interface that abstracts the input that feeds the
37// Consume() method of RegExp which may differ depending on its various
38// implementations (StringPiece for RE2, UnicodeString for ICU Regex).
39class RegExpInput {
40 public:
41  virtual ~RegExpInput() {}
42
43  // Converts to a C++ string.
44  virtual string ToString() const = 0;
45};
46
47// The regular expression abstract class. It supports only functions used in
48// phonenumberutil.cc. Consume(), Match() and Replace() methods must be
49// implemented.
50class RegExp {
51 public:
52  virtual ~RegExp() {}
53
54  // Matches string to regular expression, returns true if expression was
55  // matched, false otherwise, advances position in the match.
56  // input_string - string to be searched.
57  // anchor_at_start - if true, match would be successful only if it appears at
58  // the beginning of the tested region of the string.
59  // matched_string1 - the first string extracted from the match. Can be NULL.
60  // matched_string2 - the second string extracted from the match. Can be NULL.
61  // matched_string3 - the third string extracted from the match. Can be NULL.
62  virtual bool Consume(RegExpInput* input_string,
63                       bool anchor_at_start,
64                       string* matched_string1,
65                       string* matched_string2,
66                       string* matched_string3) const = 0;
67
68  // Helper methods calling the Consume method that assume the match must start
69  // at the beginning.
70  inline bool Consume(RegExpInput* input_string,
71                      string* matched_string1,
72                      string* matched_string2,
73                      string* matched_string3) const {
74    return Consume(input_string, true, matched_string1, matched_string2,
75                   matched_string3);
76  }
77
78  inline bool Consume(RegExpInput* input_string,
79                      string* matched_string1,
80                      string* matched_string2) const {
81    return Consume(input_string, true, matched_string1, matched_string2, NULL);
82  }
83
84  inline bool Consume(RegExpInput* input_string, string* matched_string) const {
85    return Consume(input_string, true, matched_string, NULL, NULL);
86  }
87
88  inline bool Consume(RegExpInput* input_string) const {
89    return Consume(input_string, true, NULL, NULL, NULL);
90  }
91
92  // Helper method calling the Consume method that assumes the match can start
93  // at any place in the string.
94  inline bool FindAndConsume(RegExpInput* input_string,
95                             string* matched_string) const {
96    return Consume(input_string, false, matched_string, NULL, NULL);
97  }
98
99  // Matches string to regular expression, returns true if the expression was
100  // matched, false otherwise.
101  // input_string - string to be searched.
102  // full_match - if true, match would be successful only if it matches the
103  // complete string.
104  // matched_string - the string extracted from the match. Can be NULL.
105  virtual bool Match(const string& input_string,
106                     bool full_match,
107                     string* matched_string) const = 0;
108
109  // Helper methods calling the Match method with the right arguments.
110  inline bool PartialMatch(const string& input_string,
111                           string* matched_string) const {
112    return Match(input_string, false, matched_string);
113  }
114
115  inline bool PartialMatch(const string& input_string) const {
116    return Match(input_string, false, NULL);
117  }
118
119  inline bool FullMatch(const string& input_string,
120                        string* matched_string) const {
121    return Match(input_string, true, matched_string);
122  }
123
124  inline bool FullMatch(const string& input_string) const {
125    return Match(input_string, true, NULL);
126  }
127
128  // Replaces match(es) in 'string_to_process'. If 'global' is true,
129  // replaces all the matches, otherwise only the first match.
130  // replacement_string - text the matches are replaced with. The groups in the
131  // replacement string are referenced with the $[0-9] notation.
132  // Returns true if the pattern matches and a replacement occurs, false
133  // otherwise.
134  virtual bool Replace(string* string_to_process,
135                       bool global,
136                       const string& replacement_string) const = 0;
137
138  // Helper methods calling the Replace method with the right arguments.
139  inline bool Replace(string* string_to_process,
140                      const string& replacement_string) const {
141    return Replace(string_to_process, false, replacement_string);
142  }
143
144  inline bool GlobalReplace(string* string_to_process,
145                            const string& replacement_string) const {
146    return Replace(string_to_process, true, replacement_string);
147  }
148};
149
150// Abstract factory class that lets its subclasses instantiate the classes
151// implementing RegExp and RegExpInput.
152class AbstractRegExpFactory {
153 public:
154  virtual ~AbstractRegExpFactory() {}
155
156  // Creates a new instance of RegExpInput. The deletion of the returned
157  // instance is under the responsibility of the caller.
158  virtual RegExpInput* CreateInput(const string& utf8_input) const = 0;
159
160  // Creates a new instance of RegExp. The deletion of the returned instance is
161  // under the responsibility of the caller.
162  virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0;
163};
164
165}  // namespace phonenumbers
166}  // namespace i18n
167
168#endif  // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
169