1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// Use of this source code is governed by a BSD-style license that can be
321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// found in the LICENSE file.
421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "base/i18n/break_iterator.h"
621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "base/logging.h"
821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "unicode/ubrk.h"
921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "unicode/uchar.h"
1021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "unicode/ustring.h"
1121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
1221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsennamespace base {
1321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
1421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenconst size_t npos = -1;
1521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
1621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian MonsenBreakIterator::BreakIterator(const string16* str, BreakType break_type)
1721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    : iter_(NULL),
1821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      string_(str),
1921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      break_type_(break_type),
2021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      prev_(npos),
2121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      pos_(0) {
2221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}
2321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
2421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian MonsenBreakIterator::~BreakIterator() {
2521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  if (iter_)
2621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    ubrk_close(static_cast<UBreakIterator*>(iter_));
2721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}
2821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
2921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenbool BreakIterator::Init() {
3021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  UErrorCode status = U_ZERO_ERROR;
3121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  UBreakIteratorType break_type;
3221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  switch (break_type_) {
3321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    case BREAK_WORD:
3421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      break_type = UBRK_WORD;
3521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      break;
36ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    case BREAK_LINE:
3721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    case BREAK_NEWLINE:
3821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      break_type = UBRK_LINE;
3921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      break;
4021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    default:
4121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      NOTREACHED() << "invalid break_type_";
4221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      return false;
4321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  }
4421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  iter_ = ubrk_open(break_type, NULL,
4521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen                    string_->data(), static_cast<int32_t>(string_->size()),
4621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen                    &status);
4721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  if (U_FAILURE(status)) {
4821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    NOTREACHED() << "ubrk_open failed";
4921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    return false;
5021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  }
5121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  // Move the iterator to the beginning of the string.
5221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  ubrk_first(static_cast<UBreakIterator*>(iter_));
5321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  return true;
5421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}
5521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
5621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenbool BreakIterator::Advance() {
5721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  int32_t pos;
5821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  int32_t status;
5921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  prev_ = pos_;
6021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  switch (break_type_) {
6121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    case BREAK_WORD:
62ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    case BREAK_LINE:
6321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
6421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      if (pos == UBRK_DONE) {
6521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        pos_ = npos;
6621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        return false;
6721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      }
6821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      pos_ = static_cast<size_t>(pos);
6921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      return true;
7021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    case BREAK_NEWLINE:
7121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      do {
7221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
7321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        if (pos == UBRK_DONE) {
7421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen          break;
7521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        }
7621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        pos_ = static_cast<size_t>(pos);
7721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
7821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT);
7921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      if (pos == UBRK_DONE && prev_ == pos_) {
8021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        pos_ = npos;
8121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        return false;
8221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      }
8321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      return true;
8421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    default:
8521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      NOTREACHED() << "invalid break_type_";
8621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      return false;
8721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  }
8821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}
8921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
9021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenbool BreakIterator::IsWord() const {
9121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  return (break_type_ == BREAK_WORD &&
9221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen          ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)) !=
9321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen          UBRK_WORD_NONE);
9421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}
9521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
9621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenstring16 BreakIterator::GetString() const {
9721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  DCHECK(prev_ != npos && pos_ != npos);
9821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  return string_->substr(prev_, pos_ - prev_);
9921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}
10021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
10121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}  // namespace base
102