break_iterator.cc revision 21d179b334e59e9a3bfcaed4c4430bef1bc5759d
1// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/break_iterator.h"
6
7#include "base/logging.h"
8#include "unicode/ubrk.h"
9#include "unicode/uchar.h"
10#include "unicode/ustring.h"
11
12namespace base {
13
14const size_t npos = -1;
15
16BreakIterator::BreakIterator(const string16* str, BreakType break_type)
17    : iter_(NULL),
18      string_(str),
19      break_type_(break_type),
20      prev_(npos),
21      pos_(0) {
22}
23
24BreakIterator::~BreakIterator() {
25  if (iter_)
26    ubrk_close(static_cast<UBreakIterator*>(iter_));
27}
28
29bool BreakIterator::Init() {
30  UErrorCode status = U_ZERO_ERROR;
31  UBreakIteratorType break_type;
32  switch (break_type_) {
33    case BREAK_WORD:
34      break_type = UBRK_WORD;
35      break;
36    case BREAK_SPACE:
37    case BREAK_NEWLINE:
38      break_type = UBRK_LINE;
39      break;
40    default:
41      NOTREACHED() << "invalid break_type_";
42      return false;
43  }
44  iter_ = ubrk_open(break_type, NULL,
45                    string_->data(), static_cast<int32_t>(string_->size()),
46                    &status);
47  if (U_FAILURE(status)) {
48    NOTREACHED() << "ubrk_open failed";
49    return false;
50  }
51  // Move the iterator to the beginning of the string.
52  ubrk_first(static_cast<UBreakIterator*>(iter_));
53  return true;
54}
55
56bool BreakIterator::Advance() {
57  int32_t pos;
58  int32_t status;
59  prev_ = pos_;
60  switch (break_type_) {
61    case BREAK_WORD:
62    case BREAK_SPACE:
63      pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
64      if (pos == UBRK_DONE) {
65        pos_ = npos;
66        return false;
67      }
68      pos_ = static_cast<size_t>(pos);
69      return true;
70    case BREAK_NEWLINE:
71      do {
72        pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
73        if (pos == UBRK_DONE) {
74          break;
75        }
76        pos_ = static_cast<size_t>(pos);
77        status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
78      } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT);
79      if (pos == UBRK_DONE && prev_ == pos_) {
80        pos_ = npos;
81        return false;
82      }
83      return true;
84    default:
85      NOTREACHED() << "invalid break_type_";
86      return false;
87  }
88}
89
90bool BreakIterator::IsWord() const {
91  return (break_type_ == BREAK_WORD &&
92          ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)) !=
93          UBRK_WORD_NONE);
94}
95
96string16 BreakIterator::GetString() const {
97  DCHECK(prev_ != npos && pos_ != npos);
98  return string_->substr(prev_, pos_ - prev_);
99}
100
101}  // namespace base
102