1d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien/*
2d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien * Copyright (C) 2015 The Android Open Source Project
3d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien *
4d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien * Licensed under the Apache License, Version 2.0 (the "License");
5d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien * you may not use this file except in compliance with the License.
6d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien * You may obtain a copy of the License at
7d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien *
8d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien *      http://www.apache.org/licenses/LICENSE-2.0
9d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien *
10d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien * Unless required by applicable law or agreed to in writing, software
11d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien * distributed under the License is distributed on an "AS IS" BASIS,
12d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien * See the License for the specific language governing permissions and
14d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien * limitations under the License.
15d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien */
16d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
17d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien#include <gtest/gtest.h>
18d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien#include <UnicodeUtils.h>
19d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien#include <minikin/GraphemeBreak.h>
20d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
21d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levienusing namespace android;
22d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
23d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levienbool IsBreak(const char* src) {
24d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    const size_t BUF_SIZE = 256;
25d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    uint16_t buf[BUF_SIZE];
26d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    size_t offset;
27d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    size_t size;
28d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    ParseUnicode(buf, BUF_SIZE, src, &size, &offset);
29d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    return GraphemeBreak::isGraphemeBreak(buf, 0, size, offset);
30d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien}
31d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
32d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph LevienTEST(GraphemeBreak, utf16) {
33d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+D83C | U+DC31"));  // emoji, U+1F431
34d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
35d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // tests for invalid UTF-16
36d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+D800 | U+D800"));  // two leading surrogates
37d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+DC00 | U+DC00"));  // two trailing surrogates
38d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("'a' | U+D800"));  // lonely leading surrogate
39d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+DC00 | 'a'"));  // lonely trailing surrogate
40d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+D800 | 'a'"));  // leading surrogate followed by non-surrogate
41d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("'a' | U+DC00"));  // non-surrogate followed by trailing surrogate
42d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien}
43d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
44d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph LevienTEST(GraphemeBreak, rules) {
45d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB1, sot ÷; Rule GB2, ÷ eot
46d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("| 'a'"));
47d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("'a' |"));
48d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
49d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB3, CR x LF
50d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+000D | U+000A"));  // CR x LF
51d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
52d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB4, (Control | CR | LF) ÷
53d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("'a' | U+2028"));  // Line separator
54d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("'a' | U+000D"));  // LF
55d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("'a' | U+000A"));  // CR
56d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
57d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB5, ÷ (Control | CR | LF)
58d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+2028 | 'a'"));  // Line separator
59d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+000D | 'a'"));  // LF
60d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+000A | 'a'"));  // CR
61d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
62d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB6, L x ( L | V | LV | LVT )
63d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+1100 | U+1100"));  // L x L
64d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+1100 | U+1161"));  // L x V
65d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+1100 | U+AC00"));  // L x LV
66d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+1100 | U+AC01"));  // L x LVT
67d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
68d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB7, ( LV | V ) x ( V | T )
69d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+AC00 | U+1161"));  // LV x V
70d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+1161 | U+1161"));  // V x V
71d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+AC00 | U+11A8"));  // LV x T
72d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+1161 | U+11A8"));  // V x T
73d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
74d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB8, ( LVT | T ) x T
75d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+AC01 | U+11A8"));  // LVT x T
76d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+11A8 | U+11A8"));  // T x T
77d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
78d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Other hangul pairs not counted above _are_ breaks (GB10)
79d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+AC00 | U+1100"));  // LV x L
80d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+AC01 | U+1100"));  // LVT x L
81d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+11A8 | U+1100"));  // T x L
82d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+11A8 | U+AC00"));  // T x LV
83d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+11A8 | U+AC01"));  // T x LVT
84d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
85d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB8a, Regional_Indicator x Regional_Indicator
86d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8"));
87450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8")); // Regional indicator pair (flag)
88450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8")); // Regional indicator pair (flag)
89450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    EXPECT_FALSE(IsBreak("U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8")); // Regional indicator pair (flag)
90450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka
91450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA"));  // Regional indicator pair (flag)
92450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA"));  // Regional indicator pair (flag)
93450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka
94450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    EXPECT_TRUE(IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA"));  // Regional indicator pair (flag)
95450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    EXPECT_FALSE(IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA"));  // Regional indicator pair (flag)
96450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka
97450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    EXPECT_TRUE(
98450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka            IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
99450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    EXPECT_FALSE(
100450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka            IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
101450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    EXPECT_FALSE(
102450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka            IsBreak("'a' U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8"));  // Regional indicator pair (flag)
103d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
104d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB9, x Extend
105d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("'a' | U+0301"));  // combining accent
106d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB9a, x SpacingMark
107d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+0915 | U+093E"));  // KA, AA (spacing mark)
108d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB9b, Prepend x
109d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // see tailoring test for prepend, as current ICU doesn't have any characters in the class
110d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
111d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB10, Any ÷ Any
112d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("'a' | 'b'"));
113d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("'f' | 'i'"));  // probable ligature
114d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+0644 | U+0627"));  // probable ligature, lam + alef
115d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+4E00 | U+4E00"));  // CJK ideographs
116d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("'a' | U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
117d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | 'a'"));  // Regional indicator pair (flag)
118d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien}
119d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
120d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph LevienTEST(GraphemeBreak, tailoring) {
121d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // control characters that we interpret as "extend"
122d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("'a' | U+00AD"));  // soft hyphen
123d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("'a' | U+200B"));  // zwsp
124d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("'a' | U+200E"));  // lrm
125d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("'a' | U+202A"));  // lre
126d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("'a' | U+E0041"));  // tag character
127d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
128d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // UTC-approved characters for the Prepend class
129d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+06DD | U+0661"));  // arabic subtending mark + digit one
130d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
131d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+0E01 | U+0E33"));  // Thai sara am
132d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
133d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // virama is not a grapheme break, but "pure killer" is
134d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
135d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
136d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(IsBreak("U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
137d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(IsBreak("U+0E01 U+0E3A | U+0E01"));  // thai phinthu = pure killer
1386638e05ac2de397455c30cae05aca399a567428dRaph Levien
1396638e05ac2de397455c30cae05aca399a567428dRaph Levien    // suppress grapheme breaks in zwj emoji sequences, see
1406638e05ac2de397455c30cae05aca399a567428dRaph Levien    // http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html
1416638e05ac2de397455c30cae05aca399a567428dRaph Levien    EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2764 U+FE0F U+200D U+1F48B U+200D U+1F468"));
1426638e05ac2de397455c30cae05aca399a567428dRaph Levien    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D | U+1F48B U+200D U+1F468"));
1436638e05ac2de397455c30cae05aca399a567428dRaph Levien    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D U+1F48B U+200D | U+1F468"));
1446638e05ac2de397455c30cae05aca399a567428dRaph Levien    EXPECT_FALSE(IsBreak("U+1F468 U+200D | U+1F469 U+200D U+1F466"));
1456638e05ac2de397455c30cae05aca399a567428dRaph Levien    EXPECT_FALSE(IsBreak("U+1F468 U+200D U+1F469 U+200D | U+1F466"));
1466638e05ac2de397455c30cae05aca399a567428dRaph Levien    EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F469 U+200D U+1F467 U+200D U+1F466"));
1476638e05ac2de397455c30cae05aca399a567428dRaph Levien    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D | U+1F467 U+200D U+1F466"));
1486638e05ac2de397455c30cae05aca399a567428dRaph Levien    EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D U+1F467 U+200D | U+1F466"));
1496638e05ac2de397455c30cae05aca399a567428dRaph Levien    EXPECT_FALSE(IsBreak("U+1F441 U+200D | U+1F5E8"));
1506638e05ac2de397455c30cae05aca399a567428dRaph Levien
15177f488345316fba46c271fc04bea470819ae1712Seigo Nonaka    // Do not break before and after zwj with all kind of emoji characters.
15277f488345316fba46c271fc04bea470819ae1712Seigo Nonaka    EXPECT_FALSE(IsBreak("U+1F431 | U+200D U+1F464"));
15377f488345316fba46c271fc04bea470819ae1712Seigo Nonaka    EXPECT_FALSE(IsBreak("U+1F431 U+200D | U+1F464"));
15477f488345316fba46c271fc04bea470819ae1712Seigo Nonaka
1556638e05ac2de397455c30cae05aca399a567428dRaph Levien    // ARABIC LETTER BEH + ZWJ + heart, not a zwj emoji sequence, so we preserve the break
1566638e05ac2de397455c30cae05aca399a567428dRaph Levien    EXPECT_TRUE(IsBreak("U+0628 U+200D | U+2764"));
157d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien}
158d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien
159adfa580f1f067c846509b4346e5be2cb19177c1bRaph LevienTEST(GraphemeBreak, emojiModifiers) {
160adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_FALSE(IsBreak("U+261D | U+1F3FB"));  // white up pointing index + modifier
161adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_FALSE(IsBreak("U+270C | U+1F3FB"));  // victory hand + modifier
162adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FB"));  // boy + modifier
163adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FC"));  // boy + modifier
164adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FD"));  // boy + modifier
165adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FE"));  // boy + modifier
166adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FF"));  // boy + modifier
167adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_FALSE(IsBreak("U+1F918 | U+1F3FF"));  // sign of the horns + modifier
168adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_FALSE(IsBreak("U+1F933 | U+1F3FF"));  // selfie (Unicode 9) + modifier
169adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien
170adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    // adding emoji style variation selector doesn't affect grapheme cluster
171adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_TRUE(IsBreak("U+270C U+FE0E | U+1F3FB"));  // victory hand + text style + modifier
172adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_FALSE(IsBreak("U+270C U+FE0F | U+1F3FB"));  // heart + emoji style + modifier
173adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien
174adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    // heart is not an emoji base
175adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_TRUE(IsBreak("U+2764 | U+1F3FB"));  // heart + modifier
176adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_TRUE(IsBreak("U+2764 U+FE0E | U+1F3FB"));  // heart + emoji style + modifier
177adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_TRUE(IsBreak("U+2764 U+FE0F | U+1F3FB"));  // heart + emoji style + modifier
1781934c2c3cb2c93aa12f852f95915190f8ac81facRaph Levien    EXPECT_TRUE(IsBreak("U+1F3FB | U+1F3FB"));  // modifier + modifier
179adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien
180adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    // rat is not an emoji modifer
181adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    EXPECT_TRUE(IsBreak("U+1F466 | U+1F400"));  // boy + rat
182adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien}
183adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien
184d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph LevienTEST(GraphemeBreak, offsets) {
185d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    uint16_t string[] = { 0x0041, 0x06DD, 0x0045, 0x0301, 0x0049, 0x0301 };
186d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(string, 2, 3, 2));
187d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_FALSE(GraphemeBreak::isGraphemeBreak(string, 2, 3, 3));
188d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(string, 2, 3, 4));
189d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(string, 2, 3, 5));
190d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien}
191