1c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader/*
2c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * Copyright (C) 2017 The Android Open Source Project
3c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader *
4c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * Licensed under the Apache License, Version 2.0 (the "License");
5c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * you may not use this file except in compliance with the License.
6c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * You may obtain a copy of the License at
7c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader *
8c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader *      http://www.apache.org/licenses/LICENSE-2.0
9c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader *
10c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * Unless required by applicable law or agreed to in writing, software
11c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * distributed under the License is distributed on an "AS IS" BASIS,
12c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * See the License for the specific language governing permissions and
14c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader * limitations under the License.
15c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader */
16c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
17c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#include <gtest/gtest.h>
18c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
19c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#include "ICUTestBase.h"
20c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#include <minikin/Hyphenator.h>
21c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#include <FileUtils.h>
22c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
23c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#ifndef NELEM
24c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#define NELEM(x) ((sizeof(x) / sizeof((x)[0])))
25c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader#endif
26c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
27c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournadernamespace minikin {
28c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
29c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst char* usHyph = "/system/usr/hyphen-data/hyph-en-us.hyb";
30c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst char* malayalamHyph = "/system/usr/hyphen-data/hyph-ml.hyb";
31c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
32c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournadertypedef ICUTestBase HyphenatorTest;
33c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
34c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst icu::Locale catalanLocale("ca", "ES", nullptr, nullptr);
35c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst icu::Locale polishLocale("pl", "PL", nullptr, nullptr);
36c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst icu::Locale& usLocale = icu::Locale::getUS();
37c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
38c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t HYPHEN_MINUS = 0x002D;
39c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t SOFT_HYPHEN = 0x00AD;
40c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t MIDDLE_DOT = 0x00B7;
41c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t GREEK_LOWER_ALPHA = 0x03B1;
42c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t ARMENIAN_AYB = 0x0531;
43c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t HEBREW_ALEF = 0x05D0;
44c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t ARABIC_ALEF = 0x0627;
45c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t ARABIC_BEH = 0x0628;
46c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t ARABIC_ZWARAKAY = 0x0659;
47c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t MALAYALAM_KA = 0x0D15;
48c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t UCAS_E = 0x1401;
49c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t HYPHEN = 0x2010;
50c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournaderconst uint16_t EN_DASH = 0x2013;
51c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
52c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Simple test for US English. This tests "table", which happens to be the in the exceptions list.
53c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, usEnglishAutomaticHyphenation) {
54d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(readWholeFile(usHyph).data(), 2, 3);
55c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'t', 'a', 'b', 'l', 'e'};
56c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
57c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
58c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 5, result.size());
59c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
60c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
61c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
62c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]);
63c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
64c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
65c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
66c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Catalan l·l should break as l-/l
67c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, catalanMiddleDot) {
68d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
69d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    const uint16_t word[] = {'l', 'l', MIDDLE_DOT, 'l', 'l'};
70c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
71c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), catalanLocale);
72d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    EXPECT_EQ((size_t) 5, result.size());
73c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
74c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
75c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
76c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN, result[3]);
77c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
78c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
79c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
80c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Catalan l·l should not break if the word is too short.
81c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, catalanMiddleDotShortWord) {
82d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
83c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'l', MIDDLE_DOT, 'l'};
84c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
85c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), catalanLocale);
86c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
87c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
88c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
89c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]);
90c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
91c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
92c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// If we break on a hyphen in Polish, the hyphen should be repeated on the next line.
93c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, polishHyphen) {
94d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
95c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'x', HYPHEN, 'y'};
96c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
97c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), polishLocale);
98c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
99c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
100c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
101c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE, result[2]);
102c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
103c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
104c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// If the language is Polish but the script is not Latin, don't use Polish rules for hyphenation.
105c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, polishHyphenButNonLatinWord) {
106d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
107c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {GREEK_LOWER_ALPHA, HYPHEN, GREEK_LOWER_ALPHA};
108c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
109c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), polishLocale);
110c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
111c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
112c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
113c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
114c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
115c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
116c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Polish en dash doesn't repeat on next line (as far as we know), but just provides a break
117c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// opportunity.
118c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, polishEnDash) {
119d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
120c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'x', EN_DASH, 'y'};
121c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
122c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), polishLocale);
123c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
124c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
125c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
126c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
127c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
128c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
129c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In Latin script text, soft hyphens should insert a visible hyphen if broken at.
130c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, latinSoftHyphen) {
131d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
132c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {'x', SOFT_HYPHEN, 'y'};
133c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
134c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
135c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
136c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
137c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
138c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
139c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
140c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
141c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Soft hyphens at the beginning of a word are not useful in linebreaking.
142c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, latinSoftHyphenStartingTheWord) {
143d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
144c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {SOFT_HYPHEN, 'y'};
145c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
146c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
147c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 2, result.size());
148c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
149c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
150c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
151c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
152c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In Malayalam script text, soft hyphens should not insert a visible hyphen if broken at.
153c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, malayalamSoftHyphen) {
154d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
155c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {MALAYALAM_KA, SOFT_HYPHEN, MALAYALAM_KA};
156c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
157c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
158c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
159c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
160c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
161c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
162c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
163c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
164c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In automatically hyphenated Malayalam script text, we should not insert a visible hyphen.
165c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, malayalamAutomaticHyphenation) {
166d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(readWholeFile(malayalamHyph).data(), 2, 2);
167c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {
168c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader            MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA};
169c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
170c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
171c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 5, result.size());
172c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
173c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
174c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]);
175d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[3]);
176c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]);
177c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
178c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
179c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In Armenian script text, soft hyphens should insert an Armenian hyphen if broken at.
180c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, aremenianSoftHyphen) {
181d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
182c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {ARMENIAN_AYB, SOFT_HYPHEN, ARMENIAN_AYB};
183c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
184c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
185c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
186c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
187c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
188c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN, result[2]);
189c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
190c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
191c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// In Hebrew script text, soft hyphens should insert a normal hyphen if broken at, for now.
192c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// We may need to change this to maqaf later.
193c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh PournaderTEST_F(HyphenatorTest, hebrewSoftHyphen) {
194d78f260a988024b878909555edbfcd7159e7ad2fRoozbeh Pournader    Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2);
195c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    const uint16_t word[] = {HEBREW_ALEF, SOFT_HYPHEN, HEBREW_ALEF};
196c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    std::vector<HyphenationType> result;
197c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    hyphenator->hyphenate(&result, word, NELEM(word), usLocale);
198c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ((size_t) 3, result.size());
199c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]);
200c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]);
201c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader    EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]);
202c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader}
203c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader
204c7ef4000c1e840c3d3b66e85a40ebd34a5a2a8eeRoozbeh Pournader// Soft hyphen between two Arabic letters that join should keep the joining
205