15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/string_search.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h" 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "third_party/icu/source/i18n/unicode/usearch.h" 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 107dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochnamespace base { 117dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochnamespace i18n { 127dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 137dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochFixedPatternStringSearchIgnoringCaseAndAccents:: 147dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochFixedPatternStringSearchIgnoringCaseAndAccents(const string16& find_this) 157dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch : find_this_(find_this) { 167dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // usearch_open requires a valid string argument to be searched, even if we 177dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // want to set it by usearch_setText afterwards. So, supplying a dummy text. 187dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch const string16& dummy = find_this_; 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 217dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch search_ = usearch_open(find_this_.data(), find_this_.size(), 227dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch dummy.data(), dummy.size(), 237dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch uloc_getDefault(), 247dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch NULL, // breakiter 257dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch &status); 267dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if (U_SUCCESS(status)) { 277dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch UCollator* collator = usearch_getCollator(search_); 287dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch ucol_setStrength(collator, UCOL_PRIMARY); 297dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch usearch_reset(search_); 307dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 317dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch} 327dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 337dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochFixedPatternStringSearchIgnoringCaseAndAccents:: 347dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch~FixedPatternStringSearchIgnoringCaseAndAccents() { 357dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if (search_) 367dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch usearch_close(search_); 377dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch} 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 397dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochbool FixedPatternStringSearchIgnoringCaseAndAccents::Search( 407dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch const string16& in_this, size_t* match_index, size_t* match_length) { 417dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch UErrorCode status = U_ZERO_ERROR; 427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch usearch_setText(search_, in_this.data(), in_this.size(), &status); 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Default to basic substring search if usearch fails. According to 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // if either |find_this| or |in_this| are empty. In either case basic 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // substring search will give the correct return value. 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!U_SUCCESS(status)) { 497dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch size_t index = in_this.find(find_this_); 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (index == string16::npos) { 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (match_index) 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *match_index = index; 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (match_length) 567dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch *match_length = find_this_.size(); 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 617dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch int32_t index = usearch_first(search_, &status); 627dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch if (!U_SUCCESS(status) || index == USEARCH_DONE) 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (match_index) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *match_index = static_cast<size_t>(index); 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (match_length) 677dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch *match_length = static_cast<size_t>(usearch_getMatchedLength(search_)); 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool StringSearchIgnoringCaseAndAccents(const string16& find_this, 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const string16& in_this, 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t* match_index, 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t* match_length) { 757dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch return FixedPatternStringSearchIgnoringCaseAndAccents(find_this).Search( 767dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch in_this, match_index, match_length); 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace i18n 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace base 81