1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_
6#define CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_
7
8#include "base/basictypes.h"
9#include "base/memory/scoped_ptr.h"
10#include "base/strings/string16.h"
11
12namespace base {
13namespace i18n {
14class UTF16CharIterator;
15}
16}
17
18namespace app_list {
19
20// TermBreakIterator breaks terms out of a word. Terms are broken on
21// camel case boundaries and alpha/number boundaries. Numbers are defined
22// as [0-9\.,]+.
23//  e.g.
24//   CamelCase -> Camel, Case
25//   Python2.7 -> Python, 2.7
26class TermBreakIterator {
27 public:
28  // Note that |word| must out live this iterator.
29  explicit TermBreakIterator(const base::string16& word);
30  ~TermBreakIterator();
31
32  // Advance to the next term. Returns false if at the end of the word.
33  bool Advance();
34
35  // Returns the current term, which is the substr of |word_| in range
36  // [prev_, pos_).
37  const base::string16 GetCurrentTerm() const;
38
39  size_t prev() const { return prev_; }
40  size_t pos() const { return pos_; }
41
42  static const size_t npos = -1;
43
44 private:
45  enum State {
46    STATE_START,   // Initial state
47    STATE_NUMBER,  // Current char is a number [0-9\.,].
48    STATE_UPPER,   // Current char is upper case.
49    STATE_LOWER,   // Current char is lower case.
50    STATE_CHAR,    // Current char has no case, e.g. a cjk char.
51    STATE_LAST,
52  };
53
54  // Returns new state for given |ch|.
55  State GetNewState(char16 ch);
56
57  const base::string16& word_;
58  size_t prev_;
59  size_t pos_;
60
61  scoped_ptr<base::i18n::UTF16CharIterator> iter_;
62  State state_;
63
64  DISALLOW_COPY_AND_ASSIGN(TermBreakIterator);
65};
66
67}  // namespace app_list
68
69#endif  // CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_
70