1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Copyright 2014 The Chromium Authors. All rights reserved.
2a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
3a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// found in the LICENSE file.
4a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
5a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "cpp/src/util/canonicalize_string.h"
6a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
7a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/logging.h"
8a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "cpp/include/libaddressinput/util/scoped_ptr.h"
9cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "third_party/icu/source/common/unicode/errorcode.h"
10cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "third_party/icu/source/common/unicode/locid.h"
11cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "third_party/icu/source/common/unicode/unistr.h"
12cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "third_party/icu/source/common/unicode/utypes.h"
13a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "third_party/icu/source/i18n/unicode/coll.h"
14a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
15a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace i18n {
16a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace addressinput {
17a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
18a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace {
19a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
20a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)class ChromeStringCanonicalizer : public StringCanonicalizer {
21a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) public:
22a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  ChromeStringCanonicalizer()
23a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      : error_code_(U_ZERO_ERROR),
24cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        collator_(
25cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)            icu::Collator::createInstance(
26cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                icu::Locale::getRoot(), error_code_)) {
27a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    collator_->setStrength(icu::Collator::PRIMARY);
28a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    DCHECK(U_SUCCESS(error_code_));
29a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
30a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
31a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  virtual ~ChromeStringCanonicalizer() {}
32a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
33a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // StringCanonicalizer implementation.
34a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  virtual std::string CanonicalizeString(const std::string& original) {
35a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    // Returns a canonical version of the string that can be used for comparing
36a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    // strings regardless of diacritics and capitalization.
37a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    //    CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas");
38a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    //    CanonicalizeString("Texas") == CanonicalizeString("teXas");
39a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    //    CanonicalizeString("Texas") != CanonicalizeString("California");
40a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    //
41a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    // The output is not human-readable.
42a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    //    CanonicalizeString("Texas") != "Texas";
43a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    icu::UnicodeString icu_str(
44a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        original.c_str(), static_cast<int32_t>(original.length()));
45a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
46a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
47a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    DCHECK(buffer.get());
48a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    int32_t filled_size =
49a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        collator_->getSortKey(icu_str, buffer.get(), buffer_size);
50a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    DCHECK_EQ(buffer_size, filled_size);
51a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return std::string(reinterpret_cast<const char*>(buffer.get()));
52a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
53a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
54a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) private:
55a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  UErrorCode error_code_;
56a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  scoped_ptr<icu::Collator> collator_;
57a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
58a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(ChromeStringCanonicalizer);
59a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)};
60a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
61a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}  // namespace
62a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
63a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// static
64a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)scoped_ptr<StringCanonicalizer> StringCanonicalizer::Build() {
65a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return scoped_ptr<StringCanonicalizer>(new ChromeStringCanonicalizer);
66a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
67a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
68a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}  // namespace addressinput
69a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}  // namespace i18n
70