1b9056914e2627627ffdd615e078a9b6020ab1cf2philip.liard@gmail.com// Copyright (C) 2011 The Libphonenumber Authors 21ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// 31ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Licensed under the Apache License, Version 2.0 (the "License"); 41ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// you may not use this file except in compliance with the License. 51ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// You may obtain a copy of the License at 61ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// 71ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// http://www.apache.org/licenses/LICENSE-2.0 81ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// 91ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Unless required by applicable law or agreed to in writing, software 101ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// distributed under the License is distributed on an "AS IS" BASIS, 111ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 121ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// See the License for the specific language governing permissions and 131ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// limitations under the License. 141ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 151ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Author: George Yakovlev 161ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Philippe Liard 171ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 18af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com// Note that we don't use features of ICU that depend on std::string (e.g. 19af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com// UnicodeString::toUTF8String()) to support clients that build ICU without 20af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com// -DU_HAVE_STD_STRING. 21af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com 22384682a45e06c1a6c4956fcf23669ddf23ef336ephilip.liard@gmail.com#include "phonenumbers/regexp_adapter_icu.h" 231ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 24603e7e5f83aad6e45e8d794c604a546936b77a16philip.liard@gmail.com#include <stddef.h> 251ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com#include <string> 261ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 271ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com#include <unicode/regex.h> 28af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com#include <unicode/stringpiece.h> 291ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com#include <unicode/unistr.h> 301ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 31af4a2ce290b619b39c2cb2a682ea4d7746d3fb21philip.liard@gmail.com#include "phonenumbers/base/basictypes.h" 32af4a2ce290b619b39c2cb2a682ea4d7746d3fb21philip.liard@gmail.com#include "phonenumbers/base/logging.h" 33af4a2ce290b619b39c2cb2a682ea4d7746d3fb21philip.liard@gmail.com#include "phonenumbers/base/memory/scoped_ptr.h" 341ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com#include "phonenumbers/default_logger.h" 35af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com#include "phonenumbers/string_byte_sink.h" 361ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 371ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comnamespace i18n { 381ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comnamespace phonenumbers { 391ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 401ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comusing icu::RegexMatcher; 411ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comusing icu::RegexPattern; 421ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comusing icu::UnicodeString; 431ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 441ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comnamespace { 451ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 461ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Converts UnicodeString 'source' to a UTF8-formatted std::string. 471ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comstring UnicodeStringToUtf8String(const UnicodeString& source) { 481ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com string data; 49af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com StringByteSink sink(&data); 50af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com source.toUTF8(sink); 511ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return data; 521ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com} 531ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 54af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com// Converts UTF8-formatted std::string 'source' to a UnicodeString. 55af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.comUnicodeString Utf8StringToUnicodeString(const string& source) { 56af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com // Note that we don't use icu::StringPiece(const string&). 57af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com return UnicodeString::fromUTF8( 58af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com icu::StringPiece(source.c_str(), source.size())); 59af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com} 60af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com 611ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com} // namespace 621ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 631ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Implementation of the abstract classes RegExpInput and RegExp using ICU 641ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// regular expression capabilities. 651ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 661ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// ICU implementation of the RegExpInput abstract class. 671ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comclass IcuRegExpInput : public RegExpInput { 681ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com public: 691ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com explicit IcuRegExpInput(const string& utf8_input) 70af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com : utf8_input_(Utf8StringToUnicodeString(utf8_input)), 711ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com position_(0) {} 721ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 731ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com virtual ~IcuRegExpInput() {} 741ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 751ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com virtual string ToString() const { 761ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return UnicodeStringToUtf8String(utf8_input_.tempSubString(position_)); 771ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 781ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 791ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com UnicodeString* Data() { 801ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return &utf8_input_; 811ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 821ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 831ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com // The current start position. For a newly created input, position is 0. Each 841ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com // call to ConsumeRegExp() or RegExp::Consume() advances the position in the 851ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com // case of the successful match to be after the match. 861ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com int position() const { 871ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return position_; 881ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 891ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 901ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com void set_position(int position) { 911ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com DCHECK(position >= 0 && position <= utf8_input_.length()); 921ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com position_ = position; 931ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 941ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 951ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com private: 961ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com UnicodeString utf8_input_; 971ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com int position_; 981ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 991ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com DISALLOW_COPY_AND_ASSIGN(IcuRegExpInput); 1001ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com}; 1011ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 1021ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// ICU implementation of the RegExp abstract class. 1031ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comclass IcuRegExp : public RegExp { 1041ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com public: 1051ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com explicit IcuRegExp(const string& utf8_regexp) { 1061ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com UParseError parse_error; 1071ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com UErrorCode status = U_ZERO_ERROR; 1081ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com utf8_regexp_.reset(RegexPattern::compile( 109af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com Utf8StringToUnicodeString(utf8_regexp), 0, parse_error, status)); 1101ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com if (U_FAILURE(status)) { 1111ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com // The provided regular expressions should compile correctly. 1121ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com LOG(ERROR) << "Error compiling regular expression: " << utf8_regexp; 1131ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com utf8_regexp_.reset(NULL); 1141ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1151ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1161ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 1171ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com virtual ~IcuRegExp() {} 1181ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 1191ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com virtual bool Consume(RegExpInput* input_string, 1201ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com bool anchor_at_start, 1211ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com string* matched_string1, 1221ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com string* matched_string2, 1231ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com string* matched_string3) const { 1241ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com DCHECK(input_string); 1251ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com if (!utf8_regexp_.get()) { 1261ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return false; 1271ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1281ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com IcuRegExpInput* const input = static_cast<IcuRegExpInput*>(input_string); 1291ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com UErrorCode status = U_ZERO_ERROR; 1301ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com const scoped_ptr<RegexMatcher> matcher( 1311ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com utf8_regexp_->matcher(*input->Data(), status)); 1321ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com bool match_succeeded = anchor_at_start 1331ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com ? matcher->lookingAt(input->position(), status) 1341ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com : matcher->find(input->position(), status); 1351ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com if (!match_succeeded || U_FAILURE(status)) { 1361ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return false; 1371ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1381ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com string* const matched_strings[] = { 1391ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com matched_string1, matched_string2, matched_string3 1401ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com }; 1411ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com // If less matches than expected - fail. 1421ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com for (size_t i = 0; i < arraysize(matched_strings); ++i) { 1431ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com if (matched_strings[i]) { 1441ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com // Groups are counted from 1 rather than 0. 1451ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com const int group_index = i + 1; 1461ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com if (group_index > matcher->groupCount()) { 1471ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return false; 1481ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1491ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com *matched_strings[i] = 1501ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com UnicodeStringToUtf8String(matcher->group(group_index, status)); 1511ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1521ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1531ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com input->set_position(matcher->end(status)); 1541ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return !U_FAILURE(status); 1551ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1561ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 1571ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com bool Match(const string& input_string, 1581ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com bool full_match, 1591ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com string* matched_string) const { 1601ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com if (!utf8_regexp_.get()) { 1611ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return false; 1621ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1631ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com IcuRegExpInput input(input_string); 1641ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com UErrorCode status = U_ZERO_ERROR; 1651ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com const scoped_ptr<RegexMatcher> matcher( 1661ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com utf8_regexp_->matcher(*input.Data(), status)); 1671ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com bool match_succeeded = full_match 1681ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com ? matcher->matches(input.position(), status) 1691ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com : matcher->find(input.position(), status); 1701ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com if (!match_succeeded || U_FAILURE(status)) { 1711ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return false; 1721ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1731ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com if (matcher->groupCount() > 0 && matched_string) { 1741ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com *matched_string = UnicodeStringToUtf8String(matcher->group(1, status)); 1751ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1761ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return !U_FAILURE(status); 1771ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1781ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 1791ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com bool Replace(string* string_to_process, 1801ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com bool global, 1811ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com const string& replacement_string) const { 1821ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com DCHECK(string_to_process); 1831ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com if (!utf8_regexp_.get()) { 1841ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return false; 1851ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 1861ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com IcuRegExpInput input(*string_to_process); 1871ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com UErrorCode status = U_ZERO_ERROR; 1881ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com const scoped_ptr<RegexMatcher> matcher( 1891ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com utf8_regexp_->matcher(*input.Data(), status)); 1901ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com if (U_FAILURE(status)) { 1911ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return false; 1921ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 19335bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com 19435bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com UnicodeString output; 19535bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com // We reimplement ReplaceFirst and ReplaceAll such that their behaviour is 19635bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com // consistent with the RE2 reg-ex matcher. 19735bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com if (!matcher->find()) { 1981ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return false; 1991ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 20035bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com matcher->appendReplacement(output, 20135bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com Utf8StringToUnicodeString(replacement_string), 20235bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com status); 20335bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com if (global) { 20435bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com // Continue and look for more matches. 20535bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com while (matcher->find()) { 20635bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com matcher->appendReplacement( 20735bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com output, 20835bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com Utf8StringToUnicodeString(replacement_string), 20935bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com status); 21035bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com } 21135bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com } 21235bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com 21335bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com matcher->appendTail(output); 21435bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com if (U_FAILURE(status)) { 2151ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return false; 2161ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 21735bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com const string replaced_string = UnicodeStringToUtf8String(output); 2181ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com *string_to_process = replaced_string; 2191ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return true; 2201ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com } 2211ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 2221ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com private: 2231ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com scoped_ptr<RegexPattern> utf8_regexp_; 2241ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 2251ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com DISALLOW_COPY_AND_ASSIGN(IcuRegExp); 2261ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com}; 2271ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 228384682a45e06c1a6c4956fcf23669ddf23ef336ephilip.liard@gmail.comRegExpInput* ICURegExpFactory::CreateInput(const string& utf8_input) const { 2291ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return new IcuRegExpInput(utf8_input); 2301ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com} 2311ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 232384682a45e06c1a6c4956fcf23669ddf23ef336ephilip.liard@gmail.comRegExp* ICURegExpFactory::CreateRegExp(const string& utf8_regexp) const { 2331ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com return new IcuRegExp(utf8_regexp); 2341ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com} 2351ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com 2361ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com} // namespace phonenumbers 2371ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com} // namespace i18n 238