1b9056914e2627627ffdd615e078a9b6020ab1cf2philip.liard@gmail.com// Copyright (C) 2011 The Libphonenumber Authors
21ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com//
31ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Licensed under the Apache License, Version 2.0 (the "License");
41ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// you may not use this file except in compliance with the License.
51ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// You may obtain a copy of the License at
61ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com//
71ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// http://www.apache.org/licenses/LICENSE-2.0
81ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com//
91ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Unless required by applicable law or agreed to in writing, software
101ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// distributed under the License is distributed on an "AS IS" BASIS,
111ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
121ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// See the License for the specific language governing permissions and
131ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// limitations under the License.
141ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
151ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Author: George Yakovlev
161ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com//         Philippe Liard
171ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
18af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com// Note that we don't use features of ICU that depend on std::string (e.g.
19af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com// UnicodeString::toUTF8String()) to support clients that build ICU without
20af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com// -DU_HAVE_STD_STRING.
21af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com
22384682a45e06c1a6c4956fcf23669ddf23ef336ephilip.liard@gmail.com#include "phonenumbers/regexp_adapter_icu.h"
231ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
24603e7e5f83aad6e45e8d794c604a546936b77a16philip.liard@gmail.com#include <stddef.h>
251ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com#include <string>
261ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
271ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com#include <unicode/regex.h>
28af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com#include <unicode/stringpiece.h>
291ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com#include <unicode/unistr.h>
301ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
31af4a2ce290b619b39c2cb2a682ea4d7746d3fb21philip.liard@gmail.com#include "phonenumbers/base/basictypes.h"
32af4a2ce290b619b39c2cb2a682ea4d7746d3fb21philip.liard@gmail.com#include "phonenumbers/base/logging.h"
33af4a2ce290b619b39c2cb2a682ea4d7746d3fb21philip.liard@gmail.com#include "phonenumbers/base/memory/scoped_ptr.h"
341ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com#include "phonenumbers/default_logger.h"
35af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com#include "phonenumbers/string_byte_sink.h"
361ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
371ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comnamespace i18n {
381ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comnamespace phonenumbers {
391ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
401ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comusing icu::RegexMatcher;
411ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comusing icu::RegexPattern;
421ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comusing icu::UnicodeString;
431ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
441ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comnamespace {
451ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
461ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Converts UnicodeString 'source' to a UTF8-formatted std::string.
471ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comstring UnicodeStringToUtf8String(const UnicodeString& source) {
481ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  string data;
49af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com  StringByteSink sink(&data);
50af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com  source.toUTF8(sink);
511ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  return data;
521ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com}
531ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
54af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com// Converts UTF8-formatted std::string 'source' to a UnicodeString.
55af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.comUnicodeString Utf8StringToUnicodeString(const string& source) {
56af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com  // Note that we don't use icu::StringPiece(const string&).
57af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com  return UnicodeString::fromUTF8(
58af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com      icu::StringPiece(source.c_str(), source.size()));
59af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com}
60af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com
611ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com}  // namespace
621ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
631ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// Implementation of the abstract classes RegExpInput and RegExp using ICU
641ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// regular expression capabilities.
651ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
661ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// ICU implementation of the RegExpInput abstract class.
671ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comclass IcuRegExpInput : public RegExpInput {
681ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com public:
691ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  explicit IcuRegExpInput(const string& utf8_input)
70af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com      : utf8_input_(Utf8StringToUnicodeString(utf8_input)),
711ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        position_(0) {}
721ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
731ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  virtual ~IcuRegExpInput() {}
741ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
751ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  virtual string ToString() const {
761ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    return UnicodeStringToUtf8String(utf8_input_.tempSubString(position_));
771ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  }
781ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
791ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  UnicodeString* Data() {
801ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    return &utf8_input_;
811ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  }
821ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
831ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  // The current start position. For a newly created input, position is 0. Each
841ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  // call to ConsumeRegExp() or RegExp::Consume() advances the position in the
851ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  // case of the successful match to be after the match.
861ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  int position() const {
871ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    return position_;
881ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  }
891ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
901ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  void set_position(int position) {
911ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    DCHECK(position >= 0 && position <= utf8_input_.length());
921ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    position_ = position;
931ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  }
941ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
951ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com private:
961ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  UnicodeString utf8_input_;
971ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  int position_;
981ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
991ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  DISALLOW_COPY_AND_ASSIGN(IcuRegExpInput);
1001ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com};
1011ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
1021ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com// ICU implementation of the RegExp abstract class.
1031ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.comclass IcuRegExp : public RegExp {
1041ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com public:
1051ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  explicit IcuRegExp(const string& utf8_regexp) {
1061ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    UParseError parse_error;
1071ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    UErrorCode status = U_ZERO_ERROR;
1081ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    utf8_regexp_.reset(RegexPattern::compile(
109af3adc4377ce5f9eb45ccaf75bb21795701211caphilip.liard@gmail.com        Utf8StringToUnicodeString(utf8_regexp), 0, parse_error, status));
1101ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    if (U_FAILURE(status)) {
1111ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      // The provided regular expressions should compile correctly.
1121ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      LOG(ERROR) << "Error compiling regular expression: " << utf8_regexp;
1131ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      utf8_regexp_.reset(NULL);
1141ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
1151ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  }
1161ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
1171ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  virtual ~IcuRegExp() {}
1181ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
1191ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  virtual bool Consume(RegExpInput* input_string,
1201ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com                       bool anchor_at_start,
1211ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com                       string* matched_string1,
1221ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com                       string* matched_string2,
1231ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com                       string* matched_string3) const {
1241ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    DCHECK(input_string);
1251ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    if (!utf8_regexp_.get()) {
1261ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      return false;
1271ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
1281ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    IcuRegExpInput* const input = static_cast<IcuRegExpInput*>(input_string);
1291ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    UErrorCode status = U_ZERO_ERROR;
1301ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    const scoped_ptr<RegexMatcher> matcher(
1311ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        utf8_regexp_->matcher(*input->Data(), status));
1321ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    bool match_succeeded = anchor_at_start
1331ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        ? matcher->lookingAt(input->position(), status)
1341ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        : matcher->find(input->position(), status);
1351ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    if (!match_succeeded || U_FAILURE(status)) {
1361ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      return false;
1371ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
1381ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    string* const matched_strings[] = {
1391ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      matched_string1, matched_string2, matched_string3
1401ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    };
1411ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    // If less matches than expected - fail.
1421ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    for (size_t i = 0; i < arraysize(matched_strings); ++i) {
1431ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      if (matched_strings[i]) {
1441ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        // Groups are counted from 1 rather than 0.
1451ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        const int group_index = i + 1;
1461ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        if (group_index > matcher->groupCount()) {
1471ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com          return false;
1481ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        }
1491ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        *matched_strings[i] =
1501ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com            UnicodeStringToUtf8String(matcher->group(group_index, status));
1511ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      }
1521ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
1531ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    input->set_position(matcher->end(status));
1541ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    return !U_FAILURE(status);
1551ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  }
1561ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
1571ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  bool Match(const string& input_string,
1581ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com             bool full_match,
1591ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com             string* matched_string) const {
1601ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    if (!utf8_regexp_.get()) {
1611ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      return false;
1621ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
1631ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    IcuRegExpInput input(input_string);
1641ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    UErrorCode status = U_ZERO_ERROR;
1651ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    const scoped_ptr<RegexMatcher> matcher(
1661ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        utf8_regexp_->matcher(*input.Data(), status));
1671ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    bool match_succeeded = full_match
1681ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        ? matcher->matches(input.position(), status)
1691ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        : matcher->find(input.position(), status);
1701ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    if (!match_succeeded || U_FAILURE(status)) {
1711ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      return false;
1721ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
1731ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    if (matcher->groupCount() > 0 && matched_string) {
1741ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      *matched_string = UnicodeStringToUtf8String(matcher->group(1, status));
1751ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
1761ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    return !U_FAILURE(status);
1771ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  }
1781ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
1791ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  bool Replace(string* string_to_process,
1801ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com               bool global,
1811ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com               const string& replacement_string) const {
1821ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    DCHECK(string_to_process);
1831ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    if (!utf8_regexp_.get()) {
1841ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      return false;
1851ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
1861ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    IcuRegExpInput input(*string_to_process);
1871ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    UErrorCode status = U_ZERO_ERROR;
1881ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    const scoped_ptr<RegexMatcher> matcher(
1891ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com        utf8_regexp_->matcher(*input.Data(), status));
1901ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    if (U_FAILURE(status)) {
1911ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      return false;
1921ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
19335bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com
19435bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com    UnicodeString output;
19535bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com    // We reimplement ReplaceFirst and ReplaceAll such that their behaviour is
19635bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com    // consistent with the RE2 reg-ex matcher.
19735bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com    if (!matcher->find()) {
1981ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      return false;
1991ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
20035bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com    matcher->appendReplacement(output,
20135bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com                               Utf8StringToUnicodeString(replacement_string),
20235bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com                               status);
20335bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com    if (global) {
20435bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com      // Continue and look for more matches.
20535bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com      while (matcher->find()) {
20635bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com        matcher->appendReplacement(
20735bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com            output,
20835bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com            Utf8StringToUnicodeString(replacement_string),
20935bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com            status);
21035bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com      }
21135bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com    }
21235bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com
21335bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com    matcher->appendTail(output);
21435bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com    if (U_FAILURE(status)) {
2151ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com      return false;
2161ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    }
21735bd393fb78215a9c6dbeb158913def01eb58985lararennie@google.com    const string replaced_string = UnicodeStringToUtf8String(output);
2181ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    *string_to_process = replaced_string;
2191ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com    return true;
2201ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  }
2211ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
2221ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com private:
2231ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  scoped_ptr<RegexPattern> utf8_regexp_;
2241ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
2251ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  DISALLOW_COPY_AND_ASSIGN(IcuRegExp);
2261ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com};
2271ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
228384682a45e06c1a6c4956fcf23669ddf23ef336ephilip.liard@gmail.comRegExpInput* ICURegExpFactory::CreateInput(const string& utf8_input) const {
2291ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  return new IcuRegExpInput(utf8_input);
2301ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com}
2311ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
232384682a45e06c1a6c4956fcf23669ddf23ef336ephilip.liard@gmail.comRegExp* ICURegExpFactory::CreateRegExp(const string& utf8_regexp) const {
2331ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com  return new IcuRegExp(utf8_regexp);
2341ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com}
2351ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com
2361ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com}  // namespace phonenumbers
2371ad5e5bc944bfb46689d87ace2773109cb54f5ephilip.liard@gmail.com}  // namespace i18n
238