1// Copyright (C) 2011 The Libphonenumber Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Author: George Yakovlev
16//         Philippe Liard
17
18#include "phonenumbers/regexp_adapter_re2.h"
19
20#include <cstddef>
21#include <string>
22
23#include <re2/re2.h>
24#include <re2/stringpiece.h>
25
26#include "phonenumbers/base/basictypes.h"
27#include "phonenumbers/base/logging.h"
28#include "phonenumbers/stringutil.h"
29
30namespace i18n {
31namespace phonenumbers {
32
33using re2::StringPiece;
34
35// Implementation of RegExpInput abstract class.
36class RE2RegExpInput : public RegExpInput {
37 public:
38  explicit RE2RegExpInput(const string& utf8_input)
39      : string_(utf8_input),
40        utf8_input_(string_) {}
41
42  virtual string ToString() const {
43    return utf8_input_.ToString();
44  }
45
46  StringPiece* Data() {
47    return &utf8_input_;
48  }
49
50 private:
51  // string_ holds the string referenced by utf8_input_ as StringPiece doesn't
52  // copy the string passed in.
53  const string string_;
54  StringPiece utf8_input_;
55};
56
57namespace {
58
59template <typename Function, typename Input>
60bool DispatchRE2Call(Function regex_function,
61                     Input input,
62                     const RE2& regexp,
63                     string* out1,
64                     string* out2,
65                     string* out3) {
66  if (out3) {
67    return regex_function(input, regexp, out1, out2, out3);
68  }
69  if (out2) {
70    return regex_function(input, regexp, out1, out2);
71  }
72  if (out1) {
73    return regex_function(input, regexp, out1);
74  }
75  return regex_function(input, regexp);
76}
77
78// Replaces unescaped dollar-signs with backslashes. Backslashes are deleted
79// when they escape dollar-signs.
80string TransformRegularExpressionToRE2Syntax(const string& regex) {
81  string re2_regex(regex);
82  if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) {
83    return regex;
84  }
85  // If we replaced a dollar sign with a backslash and there are now two
86  // backslashes in the string, we assume that the dollar-sign was previously
87  // escaped and that we need to retain it. To do this, we replace pairs of
88  // backslashes with a dollar sign.
89  GlobalReplaceSubstring("\\\\", "$", &re2_regex);
90  return re2_regex;
91}
92
93}  // namespace
94
95// Implementation of RegExp abstract class.
96class RE2RegExp : public RegExp {
97 public:
98  explicit RE2RegExp(const string& utf8_regexp)
99      : utf8_regexp_(utf8_regexp) {}
100
101  virtual bool Consume(RegExpInput* input_string,
102                       bool anchor_at_start,
103                       string* matched_string1,
104                       string* matched_string2,
105                       string* matched_string3) const {
106    DCHECK(input_string);
107    StringPiece* utf8_input =
108        static_cast<RE2RegExpInput*>(input_string)->Data();
109
110    if (anchor_at_start) {
111      return DispatchRE2Call(RE2::Consume, utf8_input, utf8_regexp_,
112                             matched_string1, matched_string2,
113                             matched_string3);
114    } else {
115      return DispatchRE2Call(RE2::FindAndConsume, utf8_input, utf8_regexp_,
116                             matched_string1, matched_string2,
117                             matched_string3);
118    }
119  }
120
121  virtual bool Match(const string& input_string,
122                     bool full_match,
123                     string* matched_string) const {
124    if (full_match) {
125      return DispatchRE2Call(RE2::FullMatch, input_string, utf8_regexp_,
126                             matched_string, NULL, NULL);
127    } else {
128      return DispatchRE2Call(RE2::PartialMatch, input_string, utf8_regexp_,
129                             matched_string, NULL, NULL);
130    }
131  }
132
133  virtual bool Replace(string* string_to_process,
134                       bool global,
135                       const string& replacement_string) const {
136    DCHECK(string_to_process);
137    const string re2_replacement_string =
138        TransformRegularExpressionToRE2Syntax(replacement_string);
139    if (global) {
140      return RE2::GlobalReplace(string_to_process, utf8_regexp_,
141                                re2_replacement_string);
142    } else {
143      return RE2::Replace(string_to_process, utf8_regexp_,
144                          re2_replacement_string);
145    }
146  }
147
148 private:
149  RE2 utf8_regexp_;
150};
151
152RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const {
153  return new RE2RegExpInput(utf8_input);
154}
155
156RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const {
157  return new RE2RegExp(utf8_regexp);
158}
159
160}  // namespace phonenumbers
161}  // namespace i18n
162