HyphenatorTest.cpp revision d78f260a988024b878909555edbfcd7159e7ad2f
1c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader/*
2c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * Copyright (C) 2017 The Android Open Source Project
3c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader *
4c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * Licensed under the Apache License, Version 2.0 (the "License");
5c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * you may not use this file except in compliance with the License.
6c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * You may obtain a copy of the License at
7c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader *
8c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader *      http://www.apache.org/licenses/LICENSE-2.0
9c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader *
10c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * Unless required by applicable law or agreed to in writing, software
11c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * distributed under the License is distributed on an "AS IS" BASIS,
12c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * See the License for the specific language governing permissions and
14c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * limitations under the License.
15c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader */
16c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
17c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#include <gtest/gtest.h>
18c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
19c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#include "ICUTestBase.h"
20c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#include <minikin/Hyphenator.h>
21c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#include <FileUtils.h>
22c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
23c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#ifndef NELEM
24c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#define NELEM(x) ((sizeof(x) / sizeof((x)[0])))
25c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#endif
26c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
27c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournadernamespace minikin {
28c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
29c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst char* usHyph = "/system/usr/hyphen-data/hyph-en-us.hyb";
30c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst char* malayalamHyph = "/system/usr/hyphen-data/hyph-ml.hyb";
31c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
32c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournadertypedef ICUTestBase HyphenatorTest;
33c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
34c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst icu::Locale catalanLocale("ca", "ES", nullptr, nullptr);
35c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst icu::Locale polishLocale("pl", "PL", nullptr, nullptr);
36c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst icu::Locale& usLocale = icu::Locale::getUS();
37c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
38c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t HYPHEN_MINUS = 0x002D;
39c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t SOFT_HYPHEN = 0x00AD;
40c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t MIDDLE_DOT = 0x00B7;
41c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t GREEK_LOWER_ALPHA = 0x03B1;
42c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t ARMENIAN_AYB = 0x0531;
43c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t HEBREW_ALEF = 0x05D0;
44c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t ARABIC_ALEF = 0x0627;
45c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t ARABIC_BEH = 0x0628;
46c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t ARABIC_ZWARAKAY = 0x0659;
47c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t MALAYALAM_KA = 0x0D15;
48c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t UCAS_E = 0x1401;
49c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t HYPHEN = 0x2010;
50c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t EN_DASH = 0x2013;
51c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
52c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Simple test for US English. This tests "table", which happens to be the in the exceptions list.
53c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, usEnglishAutomaticHyphenation) {
54d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(readWholeFile(usHyph).data(), 2, 3);
55c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'t', 'a', 'b', 'l', 'e'};
56c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
57c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
58c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 5, result.size());
59c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
60c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
61c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
62c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]);
63c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
64c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
65c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
66c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Catalan l·l should break as l-/l
67c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, catalanMiddleDot) {
68d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
69d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    const uint16_t word[] = {'l', 'l', MIDDLE_DOT, 'l', 'l'};
70c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
71c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), catalanLocale);
72d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    EXPECT_EQ((size_t) 5, result.size());
73c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
74c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
75c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
76c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN, result[3]);
77c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
78c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
79c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
80c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Catalan l·l should not break if the word is too short.
81c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, catalanMiddleDotShortWord) {
82d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
83c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'l', MIDDLE_DOT, 'l'};
84c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
85c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), catalanLocale);
86c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
87c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
88c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
89c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
90c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
91c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
92c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// If we break on a hyphen in Polish, the hyphen should be repeated on the next line.
93c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, polishHyphen) {
94d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
95c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'x', HYPHEN, 'y'};
96c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
97c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), polishLocale);
98c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
99c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
100c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
101c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE, result[2]);
102c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
103c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
104c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// If the language is Polish but the script is not Latin, don't use Polish rules for hyphenation.
105c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, polishHyphenButNonLatinWord) {
106d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
107c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {GREEK_LOWER_ALPHA, HYPHEN, GREEK_LOWER_ALPHA};
108c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
109c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), polishLocale);
110c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
111c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
112c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
113c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
114c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
115c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
116c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Polish en dash doesn't repeat on next line (as far as we know), but just provides a break
117c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// opportunity.
118c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, polishEnDash) {
119d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
120c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'x', EN_DASH, 'y'};
121c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
122c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), polishLocale);
123c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
124c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
125c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
126c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
127c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
128c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
129c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In Latin script text, soft hyphens should insert a visible hyphen if broken at.
130c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, latinSoftHyphen) {
131d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
132c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'x', SOFT_HYPHEN, 'y'};
133c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
134c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
135c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
136c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
137c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
138c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
139c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
140c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
141c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Soft hyphens at the beginning of a word are not useful in linebreaking.
142c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, latinSoftHyphenStartingTheWord) {
143d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
144c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {SOFT_HYPHEN, 'y'};
145c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
146c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
147c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 2, result.size());
148c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
149c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
150c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
151c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
152c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In Malayalam script text, soft hyphens should not insert a visible hyphen if broken at.
153c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, malayalamSoftHyphen) {
154d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
155c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {MALAYALAM_KA, SOFT_HYPHEN, MALAYALAM_KA};
156c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
157c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
158c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
159c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
160c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
161c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
162c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
163c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
164c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In automatically hyphenated Malayalam script text, we should not insert a visible hyphen.
165c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, malayalamAutomaticHyphenation) {
166d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(readWholeFile(malayalamHyph).data(), 2, 2);
167c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {
168c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader            MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA};
169c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
170c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
171c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 5, result.size());
172c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
173c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
174c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
175d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[3]);
176c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
177c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
178c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
179c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In Armenian script text, soft hyphens should insert an Armenian hyphen if broken at.
180c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, aremenianSoftHyphen) {
181d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
182c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {ARMENIAN_AYB, SOFT_HYPHEN, ARMENIAN_AYB};
183c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
184c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
185c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
186c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
187c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
188c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN, result[2]);
189c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
190c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
191c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In Hebrew script text, soft hyphens should insert a normal hyphen if broken at, for now.
192c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// We may need to change this to maqaf later.
193c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, hebrewSoftHyphen) {
194d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
195c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {HEBREW_ALEF, SOFT_HYPHEN, HEBREW_ALEF};
196c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
197c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
198c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
199c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
200c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
201c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
202c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
203c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
204c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Soft hyphen between two Arabic letters that join should keep the joining
205c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// behavior when broken across lines.
206c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, arabicSoftHyphenConnecting) {
207d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
208c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {ARABIC_BEH, SOFT_HYPHEN, ARABIC_BEH};
209c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
210c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
211c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
212c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
213c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
214c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ, result[2]);
215c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
216c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
217c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Arabic letters may be joining on one side, but if it's the wrong side, we
218c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// should use the normal hyphen.
219c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, arabicSoftHyphenNonConnecting) {
220d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
221c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {ARABIC_ALEF, SOFT_HYPHEN, ARABIC_BEH};
222c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
223c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
224c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
225c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
226c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
227c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
228c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
229c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
230c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Skip transparent characters until you find a non-transparent one.
231c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, arabicSoftHyphenSkipTransparents) {
232d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
233c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {ARABIC_BEH, ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY, ARABIC_BEH};
234c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
235c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
236c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 5, result.size());
237c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
238c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
239c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
240c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ, result[3]);
241c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
242c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
243c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
244c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Skip transparent characters until you find a non-transparent one. If we get to one end without
245c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// finding anything, we are still non-joining.
246c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, arabicSoftHyphenTransparentsAtEnd) {
247d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
248c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {ARABIC_BEH, ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY};
249c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
250c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
251c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 4, result.size());
252c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
253c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
254c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
255c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[3]);
256c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
257c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
258c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Skip transparent characters until you find a non-transparent one. If we get to one end without
259c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// finding anything, we are still non-joining.
260c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, arabicSoftHyphenTransparentsAtStart) {
261d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
262c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY, ARABIC_BEH};
263c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
264c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
265c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 4, result.size());
266c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
267c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
268c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
269c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]);
270c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
271c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
272c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In Unified Canadian Aboriginal script (UCAS) text, soft hyphens should insert a UCAS hyphen.
273c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, ucasSoftHyphen) {
274d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
275c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {UCAS_E, SOFT_HYPHEN, UCAS_E};
276c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
277c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
278c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
279c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
280c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
281c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN, result[2]);
282c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
283c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
284c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Presently, soft hyphen looks at the character after it to determine hyphenation type. This is a
285c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// little arbitrary, but let's test it anyway.
286c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, mixedScriptSoftHyphen) {
287d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
288c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'a', SOFT_HYPHEN, UCAS_E};
289c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
290c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
291c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
292c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
293c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
294c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN, result[2]);
295c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
296c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
297c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Hard hyphens provide a breaking opportunity with nothing extra inserted.
298c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, hardHyphen) {
299d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
300c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'x', HYPHEN, 'y'};
301c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
302c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
303c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
304c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
305c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
306c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
307c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
308c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
309c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Hyphen-minuses also provide a breaking opportunity with nothing extra inserted.
310c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, hyphenMinus) {
311d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
312c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'x', HYPHEN_MINUS, 'y'};
313c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
314c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
315c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
316c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
317c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
318c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
319c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
320c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
321c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// If the word starts with a hard hyphen or hyphen-minus, it doesn't make sense to break
322c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// it at that point.
323c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, startingHyphenMinus) {
324d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
325c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {HYPHEN_MINUS, 'y'};
326c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
327c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
328c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 2, result.size());
329c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
330c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
331c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
332c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
333c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}  // namespace minikin
334c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
335