WordBreakerTests.cpp revision d8917c69a9f7b7ca52f7ac850922dab4322113f5
157b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien/* 257b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * Copyright (C) 2015 The Android Open Source Project 357b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * 457b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * Licensed under the Apache License, Version 2.0 (the "License"); 557b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * you may not use this file except in compliance with the License. 657b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * You may obtain a copy of the License at 757b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * 857b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * http://www.apache.org/licenses/LICENSE-2.0 957b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * 1057b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * Unless required by applicable law or agreed to in writing, software 1157b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * distributed under the License is distributed on an "AS IS" BASIS, 1257b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1357b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * See the License for the specific language governing permissions and 1457b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien * limitations under the License. 1557b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien */ 1657b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien 1757b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#include <gtest/gtest.h> 1857b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#include "ICUTestBase.h" 1957b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#include "UnicodeUtils.h" 2057b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#include <minikin/WordBreaker.h> 2157b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#include <unicode/locid.h> 2257b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#include <unicode/uclean.h> 2357b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#include <unicode/udata.h> 2457b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien 2557b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#define LOG_TAG "Minikin" 2657b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#include <cutils/log.h> 2757b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien 2857b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#ifndef NELEM 2957b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#define NELEM(x) ((sizeof(x) / sizeof((x)[0]))) 3057b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien#endif 3157b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien 3256840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien#define UTF16(codepoint) U16_LEAD(codepoint), U16_TRAIL(codepoint) 3356840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien 3457b6dae9894b9362ef04517ff477fd491f9d433bRaph Levienusing namespace android; 3557b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien 3657b6dae9894b9362ef04517ff477fd491f9d433bRaph Levientypedef ICUTestBase WordBreakerTest; 3757b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien 3857b6dae9894b9362ef04517ff477fd491f9d433bRaph LevienTEST_F(WordBreakerTest, basic) { 3957b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien uint16_t buf[] = {'h', 'e', 'l', 'l' ,'o', ' ', 'w', 'o', 'r', 'l', 'd'}; 4057b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien WordBreaker breaker; 4157b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien breaker.setLocale(icu::Locale::getEnglish()); 4257b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien breaker.setText(buf, NELEM(buf)); 4357b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(0, breaker.current()); 4457b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(6, breaker.next()); // after "hello " 4557b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(0, breaker.wordStart()); // "hello" 4657b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(5, breaker.wordEnd()); 47c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 4857b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(6, breaker.current()); 4957b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 5057b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(6, breaker.wordStart()); // "world" 5157b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(11, breaker.wordEnd()); 52c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 5357b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(11, breaker.current()); 5457b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien} 5557b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien 5657b6dae9894b9362ef04517ff477fd491f9d433bRaph LevienTEST_F(WordBreakerTest, softHyphen) { 5757b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien uint16_t buf[] = {'h', 'e', 'l', 0x00AD, 'l' ,'o', ' ', 'w', 'o', 'r', 'l', 'd'}; 5857b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien WordBreaker breaker; 5957b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien breaker.setLocale(icu::Locale::getEnglish()); 6057b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien breaker.setText(buf, NELEM(buf)); 6157b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(0, breaker.current()); 6257b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(7, breaker.next()); // after "hel{SOFT HYPHEN}lo " 6357b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(0, breaker.wordStart()); // "hel{SOFT HYPHEN}lo" 6457b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(6, breaker.wordEnd()); 65c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 6657b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 6757b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(7, breaker.wordStart()); // "world" 6857b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(12, breaker.wordEnd()); 69c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 7057b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien} 7157b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien 72d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh PournaderTEST_F(WordBreakerTest, postfixAndPrefix) { 73d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader uint16_t buf[] = {'U', 'S', 0x00A2, ' ', 'J', 'P', 0x00A5}; // US¢ JP¥ 74d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader WordBreaker breaker; 75d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader breaker.setLocale(icu::Locale::getEnglish()); 76d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader breaker.setText(buf, NELEM(buf)); 77d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader EXPECT_EQ(0, breaker.current()); 78d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader 79d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader EXPECT_EQ(4, breaker.next()); // after CENT SIGN 80d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader EXPECT_EQ(0, breaker.wordStart()); // "US¢" 81d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader EXPECT_EQ(3, breaker.wordEnd()); 82d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader 83d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end of string 84d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader EXPECT_EQ(4, breaker.wordStart()); // "JP¥" 85d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); 86d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader} 87d8917c69a9f7b7ca52f7ac850922dab4322113f5Roozbeh Pournader 88d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph LevienTEST_F(WordBreakerTest, zwjEmojiSequences) { 89d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien uint16_t buf[] = { 90d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien // man + zwj + heart + zwj + man 9156840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien UTF16(0x1F468), 0x200D, 0x2764, 0x200D, UTF16(0x1F468), 9256840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien // woman + zwj + heart + zwj + kiss mark + zwj + woman 9356840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien UTF16(0x1F469), 0x200D, 0x2764, 0x200D, UTF16(0x1F48B), 0x200D, UTF16(0x1F469), 94d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien // eye + zwj + left speech bubble 9556840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien UTF16(0x1F441), 0x200D, UTF16(0x1F5E8), 96d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien }; 97d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien WordBreaker breaker; 98d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien breaker.setLocale(icu::Locale::getEnglish()); 99d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien breaker.setText(buf, NELEM(buf)); 100d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien EXPECT_EQ(0, breaker.current()); 101d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien EXPECT_EQ(7, breaker.next()); // after man + zwj + heart + zwj + man 102d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien EXPECT_EQ(0, breaker.wordStart()); 103d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien EXPECT_EQ(7, breaker.wordEnd()); 104d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien EXPECT_EQ(17, breaker.next()); // after woman + zwj + heart + zwj + woman 105d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien EXPECT_EQ(7, breaker.wordStart()); 106d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien EXPECT_EQ(17, breaker.wordEnd()); 107d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 108d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien EXPECT_EQ(17, breaker.wordStart()); 109d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien EXPECT_EQ(22, breaker.wordEnd()); 110d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien} 111d3f45892c721fb1738bf02fe19a5143a320ca4bfRaph Levien 11256840e8006ca2b822adb401fc8a65f3c075cde10Raph LevienTEST_F(WordBreakerTest, emojiWithModifier) { 11356840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien uint16_t buf[] = { 11456840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien UTF16(0x1F466), UTF16(0x1F3FB), // boy + type 1-2 fitzpatrick modifier 11556840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien 0x270C, 0xFE0F, UTF16(0x1F3FF) // victory hand + emoji style + type 6 fitzpatrick modifier 11656840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien }; 11756840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien WordBreaker breaker; 11856840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien breaker.setLocale(icu::Locale::getEnglish()); 11956840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien breaker.setText(buf, NELEM(buf)); 12056840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien EXPECT_EQ(0, breaker.current()); 12156840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien EXPECT_EQ(4, breaker.next()); // after man + type 6 fitzpatrick modifier 12256840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien EXPECT_EQ(0, breaker.wordStart()); 12356840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien EXPECT_EQ(4, breaker.wordEnd()); 12456840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 12556840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien EXPECT_EQ(4, breaker.wordStart()); 12656840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien EXPECT_EQ(8, breaker.wordEnd()); 12756840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien} 12856840e8006ca2b822adb401fc8a65f3c075cde10Raph Levien 12957b6dae9894b9362ef04517ff477fd491f9d433bRaph LevienTEST_F(WordBreakerTest, punct) { 13057b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien uint16_t buf[] = {0x00A1, 0x00A1, 'h', 'e', 'l', 'l' ,'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', 13157b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien '!', '!'}; 13257b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien WordBreaker breaker; 13357b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien breaker.setLocale(icu::Locale::getEnglish()); 13457b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien breaker.setText(buf, NELEM(buf)); 13557b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(0, breaker.current()); 13657b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(9, breaker.next()); // after "¡¡hello, " 13757b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(2, breaker.wordStart()); // "hello" 13857b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(7, breaker.wordEnd()); 139c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 14057b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 14157b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(9, breaker.wordStart()); // "world" 14257b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien EXPECT_EQ(14, breaker.wordEnd()); 143c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 14457b6dae9894b9362ef04517ff477fd491f9d433bRaph Levien} 1459c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien 1469c4cc648abcae144f3b99d612e58ef01d5e52cceRaph LevienTEST_F(WordBreakerTest, email) { 1479c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', 1489c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien ' ', 'x'}; 1499c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien WordBreaker breaker; 1509c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien breaker.setLocale(icu::Locale::getEnglish()); 1519c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien breaker.setText(buf, NELEM(buf)); 1529c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(0, breaker.current()); 1536d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(11, breaker.next()); // after "foo@example" 1546d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 155c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 1566d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(16, breaker.next()); // after ".com " 1579c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 158c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 1599c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 1609c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(16, breaker.wordStart()); // "x" 1619c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(17, breaker.wordEnd()); 162c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 1639c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien} 1649c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien 1659c4cc648abcae144f3b99d612e58ef01d5e52cceRaph LevienTEST_F(WordBreakerTest, mailto) { 1669c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien uint16_t buf[] = {'m', 'a', 'i', 'l', 't', 'o', ':', 'f', 'o', 'o', '@', 1679c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', ' ', 'x'}; 1689c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien WordBreaker breaker; 1699c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien breaker.setLocale(icu::Locale::getEnglish()); 1709c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien breaker.setText(buf, NELEM(buf)); 1719c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(0, breaker.current()); 1726d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(7, breaker.next()); // after "mailto:" 1736d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 174c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 1756d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(18, breaker.next()); // after "foo@example" 1766d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 177c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 1786d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(23, breaker.next()); // after ".com " 1799c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 180c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 1819c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 1829c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(23, breaker.wordStart()); // "x" 1839c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(24, breaker.wordEnd()); 184c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 1859c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien} 1869c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien 1876d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien// The current logic always places a line break after a detected email address or URL 1886d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien// and an immediately following non-ASCII character. 1899c4cc648abcae144f3b99d612e58ef01d5e52cceRaph LevienTEST_F(WordBreakerTest, emailNonAscii) { 1909c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', 1919c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien 0x4E00}; 1929c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien WordBreaker breaker; 1939c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien breaker.setLocale(icu::Locale::getEnglish()); 1949c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien breaker.setText(buf, NELEM(buf)); 1959c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(0, breaker.current()); 1966d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(11, breaker.next()); // after "foo@example" 1976d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 198c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 1996d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(15, breaker.next()); // after ".com" 2009c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 201c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 2029c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 2039c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(15, breaker.wordStart()); // "一" 2049c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(16, breaker.wordEnd()); 205c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 2069c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien} 2079c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien 2089c4cc648abcae144f3b99d612e58ef01d5e52cceRaph LevienTEST_F(WordBreakerTest, emailCombining) { 2099c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', 2109c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien 0x0303, ' ', 'x'}; 2119c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien WordBreaker breaker; 2129c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien breaker.setLocale(icu::Locale::getEnglish()); 2139c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien breaker.setText(buf, NELEM(buf)); 2149c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(0, breaker.current()); 2156d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(11, breaker.next()); // after "foo@example" 2166d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 217c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 2186d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(17, breaker.next()); // after ".com̃ " 2199c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 220c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 2219c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 2229c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(17, breaker.wordStart()); // "x" 2239c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(18, breaker.wordEnd()); 224c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 2259c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien} 2269c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien 2276d15657e4a3826d4d47d5358f1dde211484527e9Raph LevienTEST_F(WordBreakerTest, lonelyAt) { 2286d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien uint16_t buf[] = {'a', ' ', '@', ' ', 'b'}; 2296d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien WordBreaker breaker; 2306d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien breaker.setLocale(icu::Locale::getEnglish()); 2316d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien breaker.setText(buf, NELEM(buf)); 2326d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(0, breaker.current()); 2336d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(2, breaker.next()); // after "a " 2346d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(0, breaker.wordStart()); // "a" 2356d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(1, breaker.wordEnd()); 236c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 2376d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(4, breaker.next()); // after "@ " 2386d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 239c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 2406d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 2416d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(4, breaker.wordStart()); // "b" 2426d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(5, breaker.wordEnd()); 243c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 2446d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien} 2456d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien 2469c4cc648abcae144f3b99d612e58ef01d5e52cceRaph LevienTEST_F(WordBreakerTest, url) { 2479c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'e', 'x', 'a', 'm', 'p', 'l', 'e', 2489c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien '.', 'c', 'o', 'm', ' ', 'x'}; 2499c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien WordBreaker breaker; 2509c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien breaker.setLocale(icu::Locale::getEnglish()); 2519c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien breaker.setText(buf, NELEM(buf)); 2529c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(0, breaker.current()); 2536d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(5, breaker.next()); // after "http:" 2546d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 255c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 2566d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(7, breaker.next()); // after "//" 2576d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 258c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 2596d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(14, breaker.next()); // after "example" 2606d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 261c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 2626d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(19, breaker.next()); // after ".com " 2639c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 264c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 2659c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 2669c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(19, breaker.wordStart()); // "x" 2679c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien EXPECT_EQ(20, breaker.wordEnd()); 268c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 2699c4cc648abcae144f3b99d612e58ef01d5e52cceRaph Levien} 2706d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien 2716d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien// Breaks according to section 14.12 of Chicago Manual of Style, *URLs or DOIs and line breaks* 2726d15657e4a3826d4d47d5358f1dde211484527e9Raph LevienTEST_F(WordBreakerTest, urlBreakChars) { 2736d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '.', 'b', '/', '~', 'c', ',', 'd', 2746d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien '-', 'e', '?', 'f', '=', 'g', '&', 'h', '#', 'i', '%', 'j', '_', 'k', '/', 'l'}; 2756d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien WordBreaker breaker; 2766d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien breaker.setLocale(icu::Locale::getEnglish()); 2776d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien breaker.setText(buf, NELEM(buf)); 2786d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(0, breaker.current()); 2796d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(5, breaker.next()); // after "http:" 2806d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 281c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 2826d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(7, breaker.next()); // after "//" 2836d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 284c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 2856d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(8, breaker.next()); // after "a" 2866d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 287c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 2886d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(10, breaker.next()); // after ".b" 2896d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 290c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 2916d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(11, breaker.next()); // after "/" 2926d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 293c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 2946d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(13, breaker.next()); // after "~c" 2956d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 296c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 2976d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(15, breaker.next()); // after ",d" 2986d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 299c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 3006d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(17, breaker.next()); // after "-e" 3016d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 302c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 3036d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(19, breaker.next()); // after "?f" 3046d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 305c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 3066d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(20, breaker.next()); // after "=" 3076d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 308c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 3096d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(21, breaker.next()); // after "g" 3106d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 311c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 3126d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(22, breaker.next()); // after "&" 3136d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 314c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 3156d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(23, breaker.next()); // after "h" 3166d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 317c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 3186d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(25, breaker.next()); // after "#i" 3196d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 320c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 3216d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(27, breaker.next()); // after "%j" 3226d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 323c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 3246d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(29, breaker.next()); // after "_k" 3256d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 326c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(1, breaker.breakBadness()); 3276d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 3286d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 329c88ef135fcc2661ec7addc171ebc60787df38affRaph Levien EXPECT_EQ(0, breaker.breakBadness()); 3306d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien} 3316d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien 3326d15657e4a3826d4d47d5358f1dde211484527e9Raph LevienTEST_F(WordBreakerTest, urlNoHyphenBreak) { 3336d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '-', '/', 'b'}; 3346d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien WordBreaker breaker; 3356d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien breaker.setLocale(icu::Locale::getEnglish()); 3366d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien breaker.setText(buf, NELEM(buf)); 3376d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(0, breaker.current()); 3386d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(5, breaker.next()); // after "http:" 3396d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 3406d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(7, breaker.next()); // after "//" 3416d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 3426d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(8, breaker.next()); // after "a" 3436d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 3446d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 3456d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 3466d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien} 3476d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien 3486d15657e4a3826d4d47d5358f1dde211484527e9Raph LevienTEST_F(WordBreakerTest, urlEndsWithSlash) { 3496d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '/'}; 3506d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien WordBreaker breaker; 3516d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien breaker.setLocale(icu::Locale::getEnglish()); 3526d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien breaker.setText(buf, NELEM(buf)); 3536d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(0, breaker.current()); 3546d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(5, breaker.next()); // after "http:" 3556d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 3566d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(7, breaker.next()); // after "//" 3576d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 3586d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(8, breaker.next()); // after "a" 3596d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 3606d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 3616d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 3626d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien} 3636d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien 3646d15657e4a3826d4d47d5358f1dde211484527e9Raph LevienTEST_F(WordBreakerTest, emailStartsWithSlash) { 3656d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien uint16_t buf[] = {'/', 'a', '@', 'b'}; 3666d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien WordBreaker breaker; 3676d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien breaker.setLocale(icu::Locale::getEnglish()); 3686d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien breaker.setText(buf, NELEM(buf)); 3696d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ(0, breaker.current()); 3706d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end 3716d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); 3726d15657e4a3826d4d47d5358f1dde211484527e9Raph Levien} 373