1// Copyright (C) 2014 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include "string_compare.h" 16 17#include <libaddressinput/util/basictypes.h> 18 19#include <cassert> 20#include <string> 21 22#include <re2/re2.h> 23 24#include "lru_cache_using_std.h" 25 26// RE2 uses type string, which is not necessarily the same as type std::string. 27// In order to create objects of the correct type, to be able to pass pointers 28// to these objects to RE2, the function that does that is defined inside an 29// unnamed namespace inside the re2 namespace. Oh, my ... 30namespace re2 { 31namespace { 32 33// In order to (mis-)use RE2 to implement UTF-8 capable less<>, this function 34// calls RE2::PossibleMatchRange() to calculate the "lessest" string that would 35// be a case-insensitive match to the string. This is far too expensive to do 36// repeatedly, so the function is only ever called through an LRU cache. 37std::string ComputeMinPossibleMatch(const std::string& str) { 38 string min, max; // N.B.: RE2 type string! 39 40 RE2::Options options; 41 options.set_literal(true); 42 options.set_case_sensitive(false); 43 RE2 matcher(str, options); 44 45 bool success = matcher.PossibleMatchRange(&min, &max, str.size()); 46 assert(success); 47 (void)success; // Prevent unused variable if assert() is optimized away. 48 49 return min; 50} 51 52} // namespace 53} // namespace re2 54 55namespace i18n { 56namespace addressinput { 57 58class StringCompare::Impl { 59 enum { MAX_CACHE_SIZE = 1 << 15 }; 60 61 public: 62 Impl() : min_possible_match_(&re2::ComputeMinPossibleMatch, MAX_CACHE_SIZE) { 63 options_.set_literal(true); 64 options_.set_case_sensitive(false); 65 } 66 67 ~Impl() {} 68 69 bool NaturalEquals(const std::string& a, const std::string& b) const { 70 RE2 matcher(b, options_); 71 return RE2::FullMatch(a, matcher); 72 } 73 74 bool NaturalLess(const std::string& a, const std::string& b) const { 75 const std::string& min_a(min_possible_match_(a)); 76 const std::string& min_b(min_possible_match_(b)); 77 return min_a < min_b; 78 } 79 80 private: 81 RE2::Options options_; 82 mutable lru_cache_using_std<std::string, std::string> min_possible_match_; 83 84 DISALLOW_COPY_AND_ASSIGN(Impl); 85}; 86 87StringCompare::StringCompare() : impl_(new Impl) {} 88 89StringCompare::~StringCompare() {} 90 91bool StringCompare::NaturalEquals(const std::string& a, 92 const std::string& b) const { 93 return impl_->NaturalEquals(a, b); 94} 95 96bool StringCompare::NaturalLess(const std::string& a, 97 const std::string& b) const { 98 return impl_->NaturalLess(a, b); 99} 100 101} // namespace addressinput 102} // namespace i18n 103