1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Copyright 2014 The Chromium Authors. All rights reserved. 2a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// found in the LICENSE file. 4a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 5a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "cpp/src/util/canonicalize_string.h" 6a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 7a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/logging.h" 8a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "cpp/include/libaddressinput/util/scoped_ptr.h" 9cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "third_party/icu/source/common/unicode/errorcode.h" 10cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "third_party/icu/source/common/unicode/locid.h" 11cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "third_party/icu/source/common/unicode/unistr.h" 12cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "third_party/icu/source/common/unicode/utypes.h" 13a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "third_party/icu/source/i18n/unicode/coll.h" 14a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 15a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace i18n { 16a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace addressinput { 17a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 18a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace { 19a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 20a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)class ChromeStringCanonicalizer : public StringCanonicalizer { 21a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) public: 22a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) ChromeStringCanonicalizer() 23a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) : error_code_(U_ZERO_ERROR), 24cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) collator_( 25cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) icu::Collator::createInstance( 26cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) icu::Locale::getRoot(), error_code_)) { 27a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) collator_->setStrength(icu::Collator::PRIMARY); 28a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) DCHECK(U_SUCCESS(error_code_)); 29a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) } 30a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 31a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) virtual ~ChromeStringCanonicalizer() {} 32a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 33a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // StringCanonicalizer implementation. 34a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) virtual std::string CanonicalizeString(const std::string& original) { 35a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // Returns a canonical version of the string that can be used for comparing 36a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // strings regardless of diacritics and capitalization. 37a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas"); 38a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // CanonicalizeString("Texas") == CanonicalizeString("teXas"); 39a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // CanonicalizeString("Texas") != CanonicalizeString("California"); 40a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // 41a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // The output is not human-readable. 42a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // CanonicalizeString("Texas") != "Texas"; 43a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) icu::UnicodeString icu_str( 44a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) original.c_str(), static_cast<int32_t>(original.length())); 45a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); 46a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]); 47a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) DCHECK(buffer.get()); 48a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) int32_t filled_size = 49a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) collator_->getSortKey(icu_str, buffer.get(), buffer_size); 50a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) DCHECK_EQ(buffer_size, filled_size); 51a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return std::string(reinterpret_cast<const char*>(buffer.get())); 52a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) } 53a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 54a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) private: 55a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) UErrorCode error_code_; 56a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) scoped_ptr<icu::Collator> collator_; 57a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 58a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(ChromeStringCanonicalizer); 59a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}; 60a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 61a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} // namespace 62a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 63a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// static 64a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)scoped_ptr<StringCanonicalizer> StringCanonicalizer::Build() { 65a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return scoped_ptr<StringCanonicalizer>(new ChromeStringCanonicalizer); 66a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} 67a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 68a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} // namespace addressinput 69a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} // namespace i18n 70