1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved.
2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// found in the LICENSE file.
4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
51320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "ui/app_list/search/term_break_iterator.h"
6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/i18n/char_iterator.h"
8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/logging.h"
97d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "base/strings/string_util.h"
10ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "third_party/icu/source/common/unicode/uchar.h"
11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace app_list {
13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
14a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)TermBreakIterator::TermBreakIterator(const base::string16& word)
15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    : word_(word),
16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      prev_(npos),
17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      pos_(0),
18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      iter_(new base::i18n::UTF16CharIterator(&word)),
19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      state_(STATE_START) {
20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)TermBreakIterator::~TermBreakIterator() {}
23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool TermBreakIterator::Advance() {
25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // 2D matrix that defines term boundaries. Each row represents current state.
26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Each col represents new state from input char. Cells with true value
27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // represents a term boundary.
28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  const bool kBoundary[][STATE_LAST] = {
29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // START  NUMBER UPPER  LOWER  CHAR
30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    {  false, false, false, false, false },  // START
31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    {  false, false, true,  true,  true },   // NUMBER
32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    {  false, true,  false, false, true },   // UPPER
33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    {  false, true,  true,  false, true },   // LOWER
34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    {  false, true,  true,  true,  false },  // CHAR
35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  };
36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  while (iter_->Advance()) {
38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    const State new_state = GetNewState(word_[iter_->array_pos()]);
39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    const bool is_boundary = kBoundary[state_][new_state];
40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    state_ = new_state;
41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (is_boundary)
42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      break;
43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  prev_ = pos_;
46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  pos_ = iter_->array_pos();
47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return prev_ != pos_ || !iter_->end();
49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
51a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)const base::string16 TermBreakIterator::GetCurrentTerm() const {
52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DCHECK(prev_ != npos && pos_ != npos);
53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return word_.substr(prev_, pos_ - prev_);
54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)TermBreakIterator::State TermBreakIterator::GetNewState(base::char16 ch) {
57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (IsAsciiDigit(ch) || ch == '.' || ch == ',')
58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return STATE_NUMBER;
59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  const bool is_upper = !!u_isUUppercase(ch);
61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  const bool is_lower = !!u_isULowercase(ch);
62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (is_upper && is_lower) {
64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    NOTREACHED() << "Invalid state for ch=" << ch;
65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return STATE_CHAR;
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (is_upper)
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return STATE_UPPER;
70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (is_lower)
71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return STATE_LOWER;
72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return STATE_CHAR;
74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}  // namespace app_list
77