break_iterator.cc revision ca12bfac764ba476d6cd062bf1dde12cc64c3f40
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/break_iterator.h"
6
7#include "base/logging.h"
8#include "third_party/icu/source/common/unicode/ubrk.h"
9#include "third_party/icu/source/common/unicode/uchar.h"
10#include "third_party/icu/source/common/unicode/ustring.h"
11
12namespace base {
13namespace i18n {
14
15const size_t npos = -1;
16
17BreakIterator::BreakIterator(const string16& str, BreakType break_type)
18    : iter_(NULL),
19      string_(str),
20      break_type_(break_type),
21      prev_(npos),
22      pos_(0) {
23}
24
25BreakIterator::~BreakIterator() {
26  if (iter_)
27    ubrk_close(static_cast<UBreakIterator*>(iter_));
28}
29
30bool BreakIterator::Init() {
31  UErrorCode status = U_ZERO_ERROR;
32  UBreakIteratorType break_type;
33  switch (break_type_) {
34    case BREAK_CHARACTER:
35      break_type = UBRK_CHARACTER;
36      break;
37    case BREAK_WORD:
38      break_type = UBRK_WORD;
39      break;
40    case BREAK_LINE:
41    case BREAK_NEWLINE:
42      break_type = UBRK_LINE;
43      break;
44    default:
45      NOTREACHED() << "invalid break_type_";
46      return false;
47  }
48  iter_ = ubrk_open(break_type, NULL,
49                    string_.data(), static_cast<int32_t>(string_.size()),
50                    &status);
51  if (U_FAILURE(status)) {
52    NOTREACHED() << "ubrk_open failed";
53    return false;
54  }
55  // Move the iterator to the beginning of the string.
56  ubrk_first(static_cast<UBreakIterator*>(iter_));
57  return true;
58}
59
60bool BreakIterator::Advance() {
61  int32_t pos;
62  int32_t status;
63  prev_ = pos_;
64  switch (break_type_) {
65    case BREAK_CHARACTER:
66    case BREAK_WORD:
67    case BREAK_LINE:
68      pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
69      if (pos == UBRK_DONE) {
70        pos_ = npos;
71        return false;
72      }
73      pos_ = static_cast<size_t>(pos);
74      return true;
75    case BREAK_NEWLINE:
76      do {
77        pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
78        if (pos == UBRK_DONE)
79          break;
80        pos_ = static_cast<size_t>(pos);
81        status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
82      } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT);
83      if (pos == UBRK_DONE && prev_ == pos_) {
84        pos_ = npos;
85        return false;
86      }
87      return true;
88    default:
89      NOTREACHED() << "invalid break_type_";
90      return false;
91  }
92}
93
94bool BreakIterator::IsWord() const {
95  int32_t status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
96  return (break_type_ == BREAK_WORD && status != UBRK_WORD_NONE);
97}
98
99bool BreakIterator::IsEndOfWord(size_t position) const {
100  if (break_type_ != BREAK_WORD)
101    return false;
102
103  UBreakIterator* iter = static_cast<UBreakIterator*>(iter_);
104  UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position));
105  int32_t status = ubrk_getRuleStatus(iter);
106  return (!!boundary && status != UBRK_WORD_NONE);
107}
108
109bool BreakIterator::IsStartOfWord(size_t position) const {
110  if (break_type_ != BREAK_WORD)
111    return false;
112
113  UBreakIterator* iter = static_cast<UBreakIterator*>(iter_);
114  UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position));
115  ubrk_next(iter);
116  int32_t next_status = ubrk_getRuleStatus(iter);
117  return (!!boundary && next_status != UBRK_WORD_NONE);
118}
119
120string16 BreakIterator::GetString() const {
121  DCHECK(prev_ != npos && pos_ != npos);
122  return string_.substr(prev_, pos_ - prev_);
123}
124
125}  // namespace i18n
126}  // namespace base
127