1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved. 221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// Use of this source code is governed by a BSD-style license that can be 321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// found in the LICENSE file. 421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "base/i18n/break_iterator.h" 621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "base/logging.h" 821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "unicode/ubrk.h" 921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "unicode/uchar.h" 1021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "unicode/ustring.h" 1121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 1221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsennamespace base { 1321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 1421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenconst size_t npos = -1; 1521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 1621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian MonsenBreakIterator::BreakIterator(const string16* str, BreakType break_type) 1721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen : iter_(NULL), 1821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen string_(str), 1921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break_type_(break_type), 2021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen prev_(npos), 2121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen pos_(0) { 2221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen} 2321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 2421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian MonsenBreakIterator::~BreakIterator() { 2521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen if (iter_) 2621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen ubrk_close(static_cast<UBreakIterator*>(iter_)); 2721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen} 2821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 2921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenbool BreakIterator::Init() { 3021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen UErrorCode status = U_ZERO_ERROR; 3121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen UBreakIteratorType break_type; 3221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen switch (break_type_) { 3321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen case BREAK_WORD: 3421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break_type = UBRK_WORD; 3521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break; 36ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen case BREAK_LINE: 3721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen case BREAK_NEWLINE: 3821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break_type = UBRK_LINE; 3921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break; 4021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen default: 4121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen NOTREACHED() << "invalid break_type_"; 4221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return false; 4321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 4421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen iter_ = ubrk_open(break_type, NULL, 4521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen string_->data(), static_cast<int32_t>(string_->size()), 4621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen &status); 4721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen if (U_FAILURE(status)) { 4821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen NOTREACHED() << "ubrk_open failed"; 4921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return false; 5021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 5121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen // Move the iterator to the beginning of the string. 5221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen ubrk_first(static_cast<UBreakIterator*>(iter_)); 5321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return true; 5421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen} 5521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 5621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenbool BreakIterator::Advance() { 5721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen int32_t pos; 5821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen int32_t status; 5921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen prev_ = pos_; 6021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen switch (break_type_) { 6121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen case BREAK_WORD: 62ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen case BREAK_LINE: 6321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); 6421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen if (pos == UBRK_DONE) { 6521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen pos_ = npos; 6621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return false; 6721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 6821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen pos_ = static_cast<size_t>(pos); 6921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return true; 7021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen case BREAK_NEWLINE: 7121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen do { 7221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); 7321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen if (pos == UBRK_DONE) { 7421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break; 7521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 7621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen pos_ = static_cast<size_t>(pos); 7721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); 7821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT); 7921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen if (pos == UBRK_DONE && prev_ == pos_) { 8021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen pos_ = npos; 8121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return false; 8221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 8321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return true; 8421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen default: 8521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen NOTREACHED() << "invalid break_type_"; 8621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return false; 8721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 8821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen} 8921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 9021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenbool BreakIterator::IsWord() const { 9121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return (break_type_ == BREAK_WORD && 9221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)) != 9321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen UBRK_WORD_NONE); 9421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen} 9521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 9621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenstring16 BreakIterator::GetString() const { 9721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen DCHECK(prev_ != npos && pos_ != npos); 9821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return string_->substr(prev_, pos_ - prev_); 9921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen} 10021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 10121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen} // namespace base 102