1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_ 6#define CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_ 7 8#include "base/basictypes.h" 9#include "base/memory/scoped_ptr.h" 10#include "base/strings/string16.h" 11 12namespace base { 13namespace i18n { 14class UTF16CharIterator; 15} 16} 17 18namespace app_list { 19 20// TermBreakIterator breaks terms out of a word. Terms are broken on 21// camel case boundaries and alpha/number boundaries. Numbers are defined 22// as [0-9\.,]+. 23// e.g. 24// CamelCase -> Camel, Case 25// Python2.7 -> Python, 2.7 26class TermBreakIterator { 27 public: 28 // Note that |word| must out live this iterator. 29 explicit TermBreakIterator(const base::string16& word); 30 ~TermBreakIterator(); 31 32 // Advance to the next term. Returns false if at the end of the word. 33 bool Advance(); 34 35 // Returns the current term, which is the substr of |word_| in range 36 // [prev_, pos_). 37 const base::string16 GetCurrentTerm() const; 38 39 size_t prev() const { return prev_; } 40 size_t pos() const { return pos_; } 41 42 static const size_t npos = -1; 43 44 private: 45 enum State { 46 STATE_START, // Initial state 47 STATE_NUMBER, // Current char is a number [0-9\.,]. 48 STATE_UPPER, // Current char is upper case. 49 STATE_LOWER, // Current char is lower case. 50 STATE_CHAR, // Current char has no case, e.g. a cjk char. 51 STATE_LAST, 52 }; 53 54 // Returns new state for given |ch|. 55 State GetNewState(char16 ch); 56 57 const base::string16& word_; 58 size_t prev_; 59 size_t pos_; 60 61 scoped_ptr<base::i18n::UTF16CharIterator> iter_; 62 State state_; 63 64 DISALLOW_COPY_AND_ASSIGN(TermBreakIterator); 65}; 66 67} // namespace app_list 68 69#endif // CHROME_BROWSER_UI_APP_LIST_SEARCH_TERM_BREAK_ITERATOR_H_ 70