1// Copyright (C) 2011 The Libphonenumber Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Author: George Yakovlev 16// Philippe Liard 17 18#include "phonenumbers/regexp_adapter_re2.h" 19 20#include <cstddef> 21#include <string> 22 23#include <re2/re2.h> 24#include <re2/stringpiece.h> 25 26#include "phonenumbers/base/basictypes.h" 27#include "phonenumbers/base/logging.h" 28#include "phonenumbers/stringutil.h" 29 30namespace i18n { 31namespace phonenumbers { 32 33using re2::StringPiece; 34 35// Implementation of RegExpInput abstract class. 36class RE2RegExpInput : public RegExpInput { 37 public: 38 explicit RE2RegExpInput(const string& utf8_input) 39 : string_(utf8_input), 40 utf8_input_(string_) {} 41 42 virtual string ToString() const { 43 return utf8_input_.ToString(); 44 } 45 46 StringPiece* Data() { 47 return &utf8_input_; 48 } 49 50 private: 51 // string_ holds the string referenced by utf8_input_ as StringPiece doesn't 52 // copy the string passed in. 53 const string string_; 54 StringPiece utf8_input_; 55}; 56 57namespace { 58 59template <typename Function, typename Input> 60bool DispatchRE2Call(Function regex_function, 61 Input input, 62 const RE2& regexp, 63 string* out1, 64 string* out2, 65 string* out3) { 66 if (out3) { 67 return regex_function(input, regexp, out1, out2, out3); 68 } 69 if (out2) { 70 return regex_function(input, regexp, out1, out2); 71 } 72 if (out1) { 73 return regex_function(input, regexp, out1); 74 } 75 return regex_function(input, regexp); 76} 77 78// Replaces unescaped dollar-signs with backslashes. Backslashes are deleted 79// when they escape dollar-signs. 80string TransformRegularExpressionToRE2Syntax(const string& regex) { 81 string re2_regex(regex); 82 if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) { 83 return regex; 84 } 85 // If we replaced a dollar sign with a backslash and there are now two 86 // backslashes in the string, we assume that the dollar-sign was previously 87 // escaped and that we need to retain it. To do this, we replace pairs of 88 // backslashes with a dollar sign. 89 GlobalReplaceSubstring("\\\\", "$", &re2_regex); 90 return re2_regex; 91} 92 93} // namespace 94 95// Implementation of RegExp abstract class. 96class RE2RegExp : public RegExp { 97 public: 98 explicit RE2RegExp(const string& utf8_regexp) 99 : utf8_regexp_(utf8_regexp) {} 100 101 virtual bool Consume(RegExpInput* input_string, 102 bool anchor_at_start, 103 string* matched_string1, 104 string* matched_string2, 105 string* matched_string3) const { 106 DCHECK(input_string); 107 StringPiece* utf8_input = 108 static_cast<RE2RegExpInput*>(input_string)->Data(); 109 110 if (anchor_at_start) { 111 return DispatchRE2Call(RE2::Consume, utf8_input, utf8_regexp_, 112 matched_string1, matched_string2, 113 matched_string3); 114 } else { 115 return DispatchRE2Call(RE2::FindAndConsume, utf8_input, utf8_regexp_, 116 matched_string1, matched_string2, 117 matched_string3); 118 } 119 } 120 121 virtual bool Match(const string& input_string, 122 bool full_match, 123 string* matched_string) const { 124 if (full_match) { 125 return DispatchRE2Call(RE2::FullMatch, input_string, utf8_regexp_, 126 matched_string, NULL, NULL); 127 } else { 128 return DispatchRE2Call(RE2::PartialMatch, input_string, utf8_regexp_, 129 matched_string, NULL, NULL); 130 } 131 } 132 133 virtual bool Replace(string* string_to_process, 134 bool global, 135 const string& replacement_string) const { 136 DCHECK(string_to_process); 137 const string re2_replacement_string = 138 TransformRegularExpressionToRE2Syntax(replacement_string); 139 if (global) { 140 return RE2::GlobalReplace(string_to_process, utf8_regexp_, 141 re2_replacement_string); 142 } else { 143 return RE2::Replace(string_to_process, utf8_regexp_, 144 re2_replacement_string); 145 } 146 } 147 148 private: 149 RE2 utf8_regexp_; 150}; 151 152RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const { 153 return new RE2RegExpInput(utf8_input); 154} 155 156RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const { 157 return new RE2RegExp(utf8_regexp); 158} 159 160} // namespace phonenumbers 161} // namespace i18n 162