15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/string_search.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "third_party/icu/source/i18n/unicode/usearch.h"
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
107dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochnamespace base {
117dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochnamespace i18n {
127dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
137dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochFixedPatternStringSearchIgnoringCaseAndAccents::
147dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochFixedPatternStringSearchIgnoringCaseAndAccents(const string16& find_this)
157dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    : find_this_(find_this) {
167dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  // usearch_open requires a valid string argument to be searched, even if we
177dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  // want to set it by usearch_setText afterwards. So, supplying a dummy text.
187dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  const string16& dummy = find_this_;
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UErrorCode status = U_ZERO_ERROR;
217dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  search_ = usearch_open(find_this_.data(), find_this_.size(),
227dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                         dummy.data(), dummy.size(),
237dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                         uloc_getDefault(),
247dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                         NULL,  // breakiter
257dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                         &status);
267dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  if (U_SUCCESS(status)) {
277dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    UCollator* collator = usearch_getCollator(search_);
287dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    ucol_setStrength(collator, UCOL_PRIMARY);
297dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    usearch_reset(search_);
307dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  }
317dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch}
327dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
337dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochFixedPatternStringSearchIgnoringCaseAndAccents::
347dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch~FixedPatternStringSearchIgnoringCaseAndAccents() {
357dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  if (search_)
367dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    usearch_close(search_);
377dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch}
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
397dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochbool FixedPatternStringSearchIgnoringCaseAndAccents::Search(
407dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    const string16& in_this, size_t* match_index, size_t* match_length) {
417dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  UErrorCode status = U_ZERO_ERROR;
427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  usearch_setText(search_, in_this.data(), in_this.size(), &status);
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Default to basic substring search if usearch fails. According to
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // if either |find_this| or |in_this| are empty. In either case basic
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // substring search will give the correct return value.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!U_SUCCESS(status)) {
497dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    size_t index = in_this.find(find_this_);
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (index == string16::npos) {
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (match_index)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        *match_index = index;
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (match_length)
567dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        *match_length = find_this_.size();
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
617dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  int32_t index = usearch_first(search_, &status);
627dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  if (!U_SUCCESS(status) || index == USEARCH_DONE)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (match_index)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *match_index = static_cast<size_t>(index);
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (match_length)
677dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    *match_length = static_cast<size_t>(usearch_getMatchedLength(search_));
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                        const string16& in_this,
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                        size_t* match_index,
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                        size_t* match_length) {
757dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  return FixedPatternStringSearchIgnoringCaseAndAccents(find_this).Search(
767dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      in_this, match_index, match_length);
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace i18n
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace base
81