175a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org// Copyright (C) 2014 Google Inc.
275a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org//
375a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org// Licensed under the Apache License, Version 2.0 (the "License");
475a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org// you may not use this file except in compliance with the License.
575a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org// You may obtain a copy of the License at
675a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org//
775a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org// http://www.apache.org/licenses/LICENSE-2.0
875a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org//
975a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org// Unless required by applicable law or agreed to in writing, software
1075a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org// distributed under the License is distributed on an "AS IS" BASIS,
1175a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1275a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org// See the License for the specific language governing permissions and
1375a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org// limitations under the License.
1475a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
1575a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org#include "string_compare.h"
1675a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
1775a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org#include <libaddressinput/util/basictypes.h>
1875a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
1990e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com#include <cassert>
2075a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org#include <string>
2175a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
2290e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com#include <re2/re2.h>
2390e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
2490e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com#include "lru_cache_using_std.h"
2590e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
2690e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com// RE2 uses type string, which is not necessarily the same as type std::string.
2790e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com// In order to create objects of the correct type, to be able to pass pointers
2890e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com// to these objects to RE2, the function that does that is defined inside an
2990e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com// unnamed namespace inside the re2 namespace. Oh, my ...
3090e624a56973856b6b81112fc1cd815a4224aa96roubert@google.comnamespace re2 {
3190e624a56973856b6b81112fc1cd815a4224aa96roubert@google.comnamespace {
3290e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
3390e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com// In order to (mis-)use RE2 to implement UTF-8 capable less<>, this function
3490e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com// calls RE2::PossibleMatchRange() to calculate the "lessest" string that would
3590e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com// be a case-insensitive match to the string. This is far too expensive to do
3690e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com// repeatedly, so the function is only ever called through an LRU cache.
3790e624a56973856b6b81112fc1cd815a4224aa96roubert@google.comstd::string ComputeMinPossibleMatch(const std::string& str) {
3890e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  string min, max;  // N.B.: RE2 type string!
3990e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
4090e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  RE2::Options options;
4190e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  options.set_literal(true);
4290e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  options.set_case_sensitive(false);
4390e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  RE2 matcher(str, options);
4490e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
4590e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  bool success = matcher.PossibleMatchRange(&min, &max, str.size());
4690e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  assert(success);
4790e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  (void)success;  // Prevent unused variable if assert() is optimized away.
4890e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
4990e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  return min;
5090e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com}
5190e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
5290e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com}  // namespace
5390e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com}  // namespace re2
5490e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
5575a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.orgnamespace i18n {
5675a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.orgnamespace addressinput {
5775a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
5875a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.orgclass StringCompare::Impl {
5990e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  enum { MAX_CACHE_SIZE = 1 << 15 };
6090e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
6175a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org public:
6290e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  Impl() : min_possible_match_(&re2::ComputeMinPossibleMatch, MAX_CACHE_SIZE) {
6375a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org    options_.set_literal(true);
6475a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org    options_.set_case_sensitive(false);
6575a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org  }
6675a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
6775a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org  ~Impl() {}
6875a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
6975a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org  bool NaturalEquals(const std::string& a, const std::string& b) const {
7075a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org    RE2 matcher(b, options_);
7175a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org    return RE2::FullMatch(a, matcher);
7275a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org  }
7375a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
7490e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  bool NaturalLess(const std::string& a, const std::string& b) const {
7590e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com    const std::string& min_a(min_possible_match_(a));
7690e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com    const std::string& min_b(min_possible_match_(b));
7790e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com    return min_a < min_b;
7890e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  }
7990e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
8075a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org private:
8175a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org  RE2::Options options_;
8290e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  mutable lru_cache_using_std<std::string, std::string> min_possible_match_;
8375a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
8475a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org  DISALLOW_COPY_AND_ASSIGN(Impl);
8575a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org};
8675a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
8775a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.orgStringCompare::StringCompare() : impl_(new Impl) {}
8875a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
8975a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.orgStringCompare::~StringCompare() {}
9075a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
9175a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.orgbool StringCompare::NaturalEquals(const std::string& a,
9275a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org                                  const std::string& b) const {
9375a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org  return impl_->NaturalEquals(a, b);
9475a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org}
9575a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org
9690e624a56973856b6b81112fc1cd815a4224aa96roubert@google.combool StringCompare::NaturalLess(const std::string& a,
9790e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com                                const std::string& b) const {
9890e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com  return impl_->NaturalLess(a, b);
9990e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com}
10090e624a56973856b6b81112fc1cd815a4224aa96roubert@google.com
10175a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org}  // namespace addressinput
10275a0cfdaa6edd47c9917172fa57701b5d22e83efrouslan@chromium.org}  // namespace i18n
103