1// Copyright (C) 2014 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "string_compare.h"
16
17#include <libaddressinput/util/basictypes.h>
18
19#include <cassert>
20#include <string>
21
22#include <re2/re2.h>
23
24#include "lru_cache_using_std.h"
25
26// RE2 uses type string, which is not necessarily the same as type std::string.
27// In order to create objects of the correct type, to be able to pass pointers
28// to these objects to RE2, the function that does that is defined inside an
29// unnamed namespace inside the re2 namespace. Oh, my ...
30namespace re2 {
31namespace {
32
33// In order to (mis-)use RE2 to implement UTF-8 capable less<>, this function
34// calls RE2::PossibleMatchRange() to calculate the "lessest" string that would
35// be a case-insensitive match to the string. This is far too expensive to do
36// repeatedly, so the function is only ever called through an LRU cache.
37std::string ComputeMinPossibleMatch(const std::string& str) {
38  string min, max;  // N.B.: RE2 type string!
39
40  RE2::Options options;
41  options.set_literal(true);
42  options.set_case_sensitive(false);
43  RE2 matcher(str, options);
44
45  bool success = matcher.PossibleMatchRange(&min, &max, str.size());
46  assert(success);
47  (void)success;  // Prevent unused variable if assert() is optimized away.
48
49  return min;
50}
51
52}  // namespace
53}  // namespace re2
54
55namespace i18n {
56namespace addressinput {
57
58class StringCompare::Impl {
59  enum { MAX_CACHE_SIZE = 1 << 15 };
60
61 public:
62  Impl() : min_possible_match_(&re2::ComputeMinPossibleMatch, MAX_CACHE_SIZE) {
63    options_.set_literal(true);
64    options_.set_case_sensitive(false);
65  }
66
67  ~Impl() {}
68
69  bool NaturalEquals(const std::string& a, const std::string& b) const {
70    RE2 matcher(b, options_);
71    return RE2::FullMatch(a, matcher);
72  }
73
74  bool NaturalLess(const std::string& a, const std::string& b) const {
75    const std::string& min_a(min_possible_match_(a));
76    const std::string& min_b(min_possible_match_(b));
77    return min_a < min_b;
78  }
79
80 private:
81  RE2::Options options_;
82  mutable lru_cache_using_std<std::string, std::string> min_possible_match_;
83
84  DISALLOW_COPY_AND_ASSIGN(Impl);
85};
86
87StringCompare::StringCompare() : impl_(new Impl) {}
88
89StringCompare::~StringCompare() {}
90
91bool StringCompare::NaturalEquals(const std::string& a,
92                                  const std::string& b) const {
93  return impl_->NaturalEquals(a, b);
94}
95
96bool StringCompare::NaturalLess(const std::string& a,
97                                const std::string& b) const {
98  return impl_->NaturalLess(a, b);
99}
100
101}  // namespace addressinput
102}  // namespace i18n
103