1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <gtest/gtest.h>
18
19#include "FontLanguageListCache.h"
20#include "FontLanguage.h"
21#include "FontTestUtils.h"
22#include "ICUTestBase.h"
23#include "MinikinFontForTest.h"
24#include "MinikinInternal.h"
25#include "UnicodeUtils.h"
26#include "minikin/FontFamily.h"
27
28using android::AutoMutex;
29using android::FontCollection;
30using android::FontFamily;
31using android::FontLanguage;
32using android::FontLanguages;
33using android::FontLanguageListCache;
34using android::FontStyle;
35using android::MinikinAutoUnref;
36using android::MinikinFont;
37using android::gMinikinLock;
38
39const char kItemizeFontXml[] = kTestFontDir "itemize.xml";
40const char kEmojiFont[] = kTestFontDir "Emoji.ttf";
41const char kJAFont[] = kTestFontDir "Ja.ttf";
42const char kKOFont[] = kTestFontDir "Ko.ttf";
43const char kLatinBoldFont[] = kTestFontDir "Bold.ttf";
44const char kLatinBoldItalicFont[] = kTestFontDir "BoldItalic.ttf";
45const char kLatinFont[] = kTestFontDir "Regular.ttf";
46const char kLatinItalicFont[] = kTestFontDir "Italic.ttf";
47const char kZH_HansFont[] = kTestFontDir "ZhHans.ttf";
48const char kZH_HantFont[] = kTestFontDir "ZhHant.ttf";
49
50const char kEmojiXmlFile[] = kTestFontDir "emoji.xml";
51const char kNoGlyphFont[] =  kTestFontDir "NoGlyphFont.ttf";
52const char kColorEmojiFont[] = kTestFontDir "ColorEmojiFont.ttf";
53const char kTextEmojiFont[] = kTestFontDir "TextEmojiFont.ttf";
54const char kMixedEmojiFont[] = kTestFontDir "ColorTextMixedEmojiFont.ttf";
55
56typedef ICUTestBase FontCollectionItemizeTest;
57
58// Utility function for calling itemize function.
59void itemize(FontCollection* collection, const char* str, FontStyle style,
60        std::vector<FontCollection::Run>* result) {
61    const size_t BUF_SIZE = 256;
62    uint16_t buf[BUF_SIZE];
63    size_t len;
64
65    result->clear();
66    ParseUnicode(buf, BUF_SIZE, str, &len, NULL);
67    AutoMutex _l(gMinikinLock);
68    collection->itemize(buf, len, style, result);
69}
70
71// Utility function to obtain font path associated with run.
72const std::string& getFontPath(const FontCollection::Run& run) {
73    EXPECT_NE(nullptr, run.fakedFont.font);
74    return ((MinikinFontForTest*)run.fakedFont.font)->fontPath();
75}
76
77// Utility function to obtain FontLanguages from string.
78const FontLanguages& registerAndGetFontLanguages(const std::string& lang_string) {
79    AutoMutex _l(gMinikinLock);
80    return FontLanguageListCache::getById(FontLanguageListCache::getId(lang_string));
81}
82
83TEST_F(FontCollectionItemizeTest, itemize_latin) {
84    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
85    std::vector<FontCollection::Run> runs;
86
87    const FontStyle kRegularStyle = FontStyle();
88    const FontStyle kItalicStyle = FontStyle(4, true);
89    const FontStyle kBoldStyle = FontStyle(7, false);
90    const FontStyle kBoldItalicStyle = FontStyle(7, true);
91
92    itemize(collection.get(), "'a' 'b' 'c' 'd' 'e'", kRegularStyle, &runs);
93    ASSERT_EQ(1U, runs.size());
94    EXPECT_EQ(0, runs[0].start);
95    EXPECT_EQ(5, runs[0].end);
96    EXPECT_EQ(kLatinFont, getFontPath(runs[0]));
97    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
98    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
99
100    itemize(collection.get(), "'a' 'b' 'c' 'd' 'e'", kItalicStyle, &runs);
101    ASSERT_EQ(1U, runs.size());
102    EXPECT_EQ(0, runs[0].start);
103    EXPECT_EQ(5, runs[0].end);
104    EXPECT_EQ(kLatinItalicFont, getFontPath(runs[0]));
105    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
106    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
107
108    itemize(collection.get(), "'a' 'b' 'c' 'd' 'e'", kBoldStyle, &runs);
109    ASSERT_EQ(1U, runs.size());
110    EXPECT_EQ(0, runs[0].start);
111    EXPECT_EQ(5, runs[0].end);
112    EXPECT_EQ(kLatinBoldFont, getFontPath(runs[0]));
113    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
114    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
115
116    itemize(collection.get(), "'a' 'b' 'c' 'd' 'e'", kBoldItalicStyle, &runs);
117    ASSERT_EQ(1U, runs.size());
118    EXPECT_EQ(0, runs[0].start);
119    EXPECT_EQ(5, runs[0].end);
120    EXPECT_EQ(kLatinBoldItalicFont, getFontPath(runs[0]));
121    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
122    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
123
124    // Continue if the specific characters (e.g. hyphen, comma, etc.) is
125    // followed.
126    itemize(collection.get(), "'a' ',' '-' 'd' '!'", kRegularStyle, &runs);
127    ASSERT_EQ(1U, runs.size());
128    EXPECT_EQ(0, runs[0].start);
129    EXPECT_EQ(5, runs[0].end);
130    EXPECT_EQ(kLatinFont, getFontPath(runs[0]));
131    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
132    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
133
134    itemize(collection.get(), "'a' ',' '-' 'd' '!'", kRegularStyle, &runs);
135    ASSERT_EQ(1U, runs.size());
136    EXPECT_EQ(0, runs[0].start);
137    EXPECT_EQ(5, runs[0].end);
138    EXPECT_EQ(kLatinFont, getFontPath(runs[0]));
139    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
140    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
141
142    // U+0301(COMBINING ACUTE ACCENT) must be in the same run with preceding
143    // chars if the font supports it.
144    itemize(collection.get(), "'a' U+0301", kRegularStyle, &runs);
145    ASSERT_EQ(1U, runs.size());
146    EXPECT_EQ(0, runs[0].start);
147    EXPECT_EQ(2, runs[0].end);
148    EXPECT_EQ(kLatinFont, getFontPath(runs[0]));
149    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
150    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
151}
152
153TEST_F(FontCollectionItemizeTest, itemize_emoji) {
154    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
155    std::vector<FontCollection::Run> runs;
156
157    itemize(collection.get(), "U+1F469 U+1F467", FontStyle(), &runs);
158    ASSERT_EQ(1U, runs.size());
159    EXPECT_EQ(0, runs[0].start);
160    EXPECT_EQ(4, runs[0].end);
161    EXPECT_EQ(kEmojiFont, getFontPath(runs[0]));
162    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
163    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
164
165    // U+20E3(COMBINING ENCLOSING KEYCAP) must be in the same run with preceding
166    // character if the font supports.
167    itemize(collection.get(), "'0' U+20E3", FontStyle(), &runs);
168    ASSERT_EQ(1U, runs.size());
169    EXPECT_EQ(0, runs[0].start);
170    EXPECT_EQ(2, runs[0].end);
171    EXPECT_EQ(kEmojiFont, getFontPath(runs[0]));
172    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
173    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
174
175    itemize(collection.get(), "U+1F470 U+20E3", FontStyle(), &runs);
176    ASSERT_EQ(1U, runs.size());
177    EXPECT_EQ(0, runs[0].start);
178    EXPECT_EQ(3, runs[0].end);
179    EXPECT_EQ(kEmojiFont, getFontPath(runs[0]));
180    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
181    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
182
183    itemize(collection.get(), "U+242EE U+1F470 U+20E3", FontStyle(), &runs);
184    ASSERT_EQ(2U, runs.size());
185    EXPECT_EQ(0, runs[0].start);
186    EXPECT_EQ(2, runs[0].end);
187    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
188    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
189    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
190
191    EXPECT_EQ(2, runs[1].start);
192    EXPECT_EQ(5, runs[1].end);
193    EXPECT_EQ(kEmojiFont, getFontPath(runs[1]));
194    EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold());
195    EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic());
196
197    // Currently there is no fonts which has a glyph for 'a' + U+20E3, so they
198    // are splitted into two.
199    itemize(collection.get(), "'a' U+20E3", FontStyle(), &runs);
200    ASSERT_EQ(2U, runs.size());
201    EXPECT_EQ(0, runs[0].start);
202    EXPECT_EQ(1, runs[0].end);
203    EXPECT_EQ(kLatinFont, getFontPath(runs[0]));
204    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
205    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
206
207    EXPECT_EQ(1, runs[1].start);
208    EXPECT_EQ(2, runs[1].end);
209    EXPECT_EQ(kEmojiFont, getFontPath(runs[1]));
210    EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold());
211    EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic());
212}
213
214TEST_F(FontCollectionItemizeTest, itemize_non_latin) {
215    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
216    std::vector<FontCollection::Run> runs;
217
218    FontStyle kJAStyle = FontStyle(FontStyle::registerLanguageList("ja_JP"));
219    FontStyle kUSStyle = FontStyle(FontStyle::registerLanguageList("en_US"));
220    FontStyle kZH_HansStyle = FontStyle(FontStyle::registerLanguageList("zh_Hans"));
221
222    // All Japanese Hiragana characters.
223    itemize(collection.get(), "U+3042 U+3044 U+3046 U+3048 U+304A", kUSStyle, &runs);
224    ASSERT_EQ(1U, runs.size());
225    EXPECT_EQ(0, runs[0].start);
226    EXPECT_EQ(5, runs[0].end);
227    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
228    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
229    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
230
231    // All Korean Hangul characters.
232    itemize(collection.get(), "U+B300 U+D55C U+BBFC U+AD6D", kUSStyle, &runs);
233    ASSERT_EQ(1U, runs.size());
234    EXPECT_EQ(0, runs[0].start);
235    EXPECT_EQ(4, runs[0].end);
236    EXPECT_EQ(kKOFont, getFontPath(runs[0]));
237    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
238    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
239
240    // All Han characters ja, zh-Hans font having.
241    // Japanese font should be selected if the specified language is Japanese.
242    itemize(collection.get(), "U+81ED U+82B1 U+5FCD", kJAStyle, &runs);
243    ASSERT_EQ(1U, runs.size());
244    EXPECT_EQ(0, runs[0].start);
245    EXPECT_EQ(3, runs[0].end);
246    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
247    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
248    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
249
250    // Simplified Chinese font should be selected if the specified language is Simplified
251    // Chinese.
252    itemize(collection.get(), "U+81ED U+82B1 U+5FCD", kZH_HansStyle, &runs);
253    ASSERT_EQ(1U, runs.size());
254    EXPECT_EQ(0, runs[0].start);
255    EXPECT_EQ(3, runs[0].end);
256    EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
257    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
258    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
259
260    // Fallbacks to other fonts if there is no glyph in the specified language's
261    // font. There is no character U+4F60 in Japanese.
262    itemize(collection.get(), "U+81ED U+4F60 U+5FCD", kJAStyle, &runs);
263    ASSERT_EQ(3U, runs.size());
264    EXPECT_EQ(0, runs[0].start);
265    EXPECT_EQ(1, runs[0].end);
266    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
267    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
268    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
269
270    EXPECT_EQ(1, runs[1].start);
271    EXPECT_EQ(2, runs[1].end);
272    EXPECT_EQ(kZH_HansFont, getFontPath(runs[1]));
273    EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold());
274    EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic());
275
276    EXPECT_EQ(2, runs[2].start);
277    EXPECT_EQ(3, runs[2].end);
278    EXPECT_EQ(kJAFont, getFontPath(runs[2]));
279    EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeBold());
280    EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeItalic());
281
282    // Tone mark.
283    itemize(collection.get(), "U+4444 U+302D", FontStyle(), &runs);
284    ASSERT_EQ(1U, runs.size());
285    EXPECT_EQ(0, runs[0].start);
286    EXPECT_EQ(2, runs[0].end);
287    EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
288    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
289    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
290
291    // Both zh-Hant and ja fonts support U+242EE, but zh-Hans doesn't.
292    // Here, ja and zh-Hant font should have the same score but ja should be selected since it is
293    // listed before zh-Hant.
294    itemize(collection.get(), "U+242EE", kZH_HansStyle, &runs);
295    ASSERT_EQ(1U, runs.size());
296    EXPECT_EQ(0, runs[0].start);
297    EXPECT_EQ(2, runs[0].end);
298    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
299    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
300    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
301}
302
303TEST_F(FontCollectionItemizeTest, itemize_mixed) {
304    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
305    std::vector<FontCollection::Run> runs;
306
307    FontStyle kUSStyle = FontStyle(FontStyle::registerLanguageList("en_US"));
308
309    itemize(collection.get(), "'a' U+4F60 'b' U+4F60 'c'", kUSStyle, &runs);
310    ASSERT_EQ(5U, runs.size());
311    EXPECT_EQ(0, runs[0].start);
312    EXPECT_EQ(1, runs[0].end);
313    EXPECT_EQ(kLatinFont, getFontPath(runs[0]));
314    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
315    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
316
317    EXPECT_EQ(1, runs[1].start);
318    EXPECT_EQ(2, runs[1].end);
319    EXPECT_EQ(kZH_HansFont, getFontPath(runs[1]));
320    EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold());
321    EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic());
322
323    EXPECT_EQ(2, runs[2].start);
324    EXPECT_EQ(3, runs[2].end);
325    EXPECT_EQ(kLatinFont, getFontPath(runs[2]));
326    EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeBold());
327    EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeItalic());
328
329    EXPECT_EQ(3, runs[3].start);
330    EXPECT_EQ(4, runs[3].end);
331    EXPECT_EQ(kZH_HansFont, getFontPath(runs[3]));
332    EXPECT_FALSE(runs[3].fakedFont.fakery.isFakeBold());
333    EXPECT_FALSE(runs[3].fakedFont.fakery.isFakeItalic());
334
335    EXPECT_EQ(4, runs[4].start);
336    EXPECT_EQ(5, runs[4].end);
337    EXPECT_EQ(kLatinFont, getFontPath(runs[4]));
338    EXPECT_FALSE(runs[4].fakedFont.fakery.isFakeBold());
339    EXPECT_FALSE(runs[4].fakedFont.fakery.isFakeItalic());
340}
341
342TEST_F(FontCollectionItemizeTest, itemize_variationSelector) {
343    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
344    std::vector<FontCollection::Run> runs;
345
346    // A glyph for U+4FAE is provided by both Japanese font and Simplified
347    // Chinese font. Also a glyph for U+242EE is provided by both Japanese and
348    // Traditional Chinese font.  To avoid effects of device default locale,
349    // explicitly specify the locale.
350    FontStyle kZH_HansStyle = FontStyle(FontStyle::registerLanguageList("zh_Hans"));
351    FontStyle kZH_HantStyle = FontStyle(FontStyle::registerLanguageList("zh_Hant"));
352
353    // U+4FAE is available in both zh_Hans and ja font, but U+4FAE,U+FE00 is
354    // only available in ja font.
355    itemize(collection.get(), "U+4FAE", kZH_HansStyle, &runs);
356    ASSERT_EQ(1U, runs.size());
357    EXPECT_EQ(0, runs[0].start);
358    EXPECT_EQ(1, runs[0].end);
359    EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
360
361    itemize(collection.get(), "U+4FAE U+FE00", kZH_HansStyle, &runs);
362    ASSERT_EQ(1U, runs.size());
363    EXPECT_EQ(0, runs[0].start);
364    EXPECT_EQ(2, runs[0].end);
365    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
366
367    itemize(collection.get(), "U+4FAE U+4FAE U+FE00", kZH_HansStyle, &runs);
368    ASSERT_EQ(2U, runs.size());
369    EXPECT_EQ(0, runs[0].start);
370    EXPECT_EQ(1, runs[0].end);
371    EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
372    EXPECT_EQ(1, runs[1].start);
373    EXPECT_EQ(3, runs[1].end);
374    EXPECT_EQ(kJAFont, getFontPath(runs[1]));
375
376    itemize(collection.get(), "U+4FAE U+4FAE U+FE00 U+4FAE", kZH_HansStyle, &runs);
377    ASSERT_EQ(3U, runs.size());
378    EXPECT_EQ(0, runs[0].start);
379    EXPECT_EQ(1, runs[0].end);
380    EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
381    EXPECT_EQ(1, runs[1].start);
382    EXPECT_EQ(3, runs[1].end);
383    EXPECT_EQ(kJAFont, getFontPath(runs[1]));
384    EXPECT_EQ(3, runs[2].start);
385    EXPECT_EQ(4, runs[2].end);
386    EXPECT_EQ(kZH_HansFont, getFontPath(runs[2]));
387
388    // Validation selector after validation selector.
389    itemize(collection.get(), "U+4FAE U+FE00 U+FE00", kZH_HansStyle, &runs);
390    ASSERT_EQ(1U, runs.size());
391    EXPECT_EQ(0, runs[0].start);
392    EXPECT_EQ(3, runs[0].end);
393    EXPECT_EQ(kJAFont, getFontPath(runs[1]));
394
395    // No font supports U+242EE U+FE0E.
396    itemize(collection.get(), "U+4FAE U+FE0E", kZH_HansStyle, &runs);
397    ASSERT_EQ(1U, runs.size());
398    EXPECT_EQ(0, runs[0].start);
399    EXPECT_EQ(2, runs[0].end);
400    EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
401
402    // Surrogate pairs handling.
403    // U+242EE is available in ja font and zh_Hant font.
404    // U+242EE U+FE00 is available only in ja font.
405    itemize(collection.get(), "U+242EE", kZH_HantStyle, &runs);
406    ASSERT_EQ(1U, runs.size());
407    EXPECT_EQ(0, runs[0].start);
408    EXPECT_EQ(2, runs[0].end);
409    EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
410
411    itemize(collection.get(), "U+242EE U+FE00", kZH_HantStyle, &runs);
412    ASSERT_EQ(1U, runs.size());
413    EXPECT_EQ(0, runs[0].start);
414    EXPECT_EQ(3, runs[0].end);
415    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
416
417    itemize(collection.get(), "U+242EE U+242EE U+FE00", kZH_HantStyle, &runs);
418    ASSERT_EQ(2U, runs.size());
419    EXPECT_EQ(0, runs[0].start);
420    EXPECT_EQ(2, runs[0].end);
421    EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
422    EXPECT_EQ(2, runs[1].start);
423    EXPECT_EQ(5, runs[1].end);
424    EXPECT_EQ(kJAFont, getFontPath(runs[1]));
425
426    itemize(collection.get(), "U+242EE U+242EE U+FE00 U+242EE", kZH_HantStyle, &runs);
427    ASSERT_EQ(3U, runs.size());
428    EXPECT_EQ(0, runs[0].start);
429    EXPECT_EQ(2, runs[0].end);
430    EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
431    EXPECT_EQ(2, runs[1].start);
432    EXPECT_EQ(5, runs[1].end);
433    EXPECT_EQ(kJAFont, getFontPath(runs[1]));
434    EXPECT_EQ(5, runs[2].start);
435    EXPECT_EQ(7, runs[2].end);
436    EXPECT_EQ(kZH_HantFont, getFontPath(runs[2]));
437
438    // Validation selector after validation selector.
439    itemize(collection.get(), "U+242EE U+FE00 U+FE00", kZH_HansStyle, &runs);
440    ASSERT_EQ(1U, runs.size());
441    EXPECT_EQ(0, runs[0].start);
442    EXPECT_EQ(4, runs[0].end);
443    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
444
445    // No font supports U+242EE U+FE0E
446    itemize(collection.get(), "U+242EE U+FE0E", kZH_HantStyle, &runs);
447    ASSERT_EQ(1U, runs.size());
448    EXPECT_EQ(0, runs[0].start);
449    EXPECT_EQ(3, runs[0].end);
450    EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
451
452    // Isolated variation selector supplement.
453    itemize(collection.get(), "U+FE00", FontStyle(), &runs);
454    ASSERT_EQ(1U, runs.size());
455    EXPECT_EQ(0, runs[0].start);
456    EXPECT_EQ(1, runs[0].end);
457    EXPECT_TRUE(runs[0].fakedFont.font == nullptr || kLatinFont == getFontPath(runs[0]));
458
459    itemize(collection.get(), "U+FE00", kZH_HantStyle, &runs);
460    ASSERT_EQ(1U, runs.size());
461    EXPECT_EQ(0, runs[0].start);
462    EXPECT_EQ(1, runs[0].end);
463    EXPECT_TRUE(runs[0].fakedFont.font == nullptr || kLatinFont == getFontPath(runs[0]));
464
465    // First font family (Regular.ttf) supports U+203C but doesn't support U+203C U+FE0F.
466    // Emoji.ttf font supports U+203C U+FE0F.  Emoji.ttf should be selected.
467    itemize(collection.get(), "U+203C U+FE0F", kZH_HantStyle, &runs);
468    ASSERT_EQ(1U, runs.size());
469    EXPECT_EQ(0, runs[0].start);
470    EXPECT_EQ(2, runs[0].end);
471    EXPECT_EQ(kEmojiFont, getFontPath(runs[0]));
472
473    // First font family (Regular.ttf) supports U+203C U+FE0E.
474    itemize(collection.get(), "U+203C U+FE0E", kZH_HantStyle, &runs);
475    ASSERT_EQ(1U, runs.size());
476    EXPECT_EQ(0, runs[0].start);
477    EXPECT_EQ(2, runs[0].end);
478    EXPECT_EQ(kLatinFont, getFontPath(runs[0]));
479}
480
481TEST_F(FontCollectionItemizeTest, itemize_variationSelectorSupplement) {
482    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
483    std::vector<FontCollection::Run> runs;
484
485    // A glyph for U+845B is provided by both Japanese font and Simplified
486    // Chinese font. Also a glyph for U+242EE is provided by both Japanese and
487    // Traditional Chinese font.  To avoid effects of device default locale,
488    // explicitly specify the locale.
489    FontStyle kZH_HansStyle = FontStyle(FontStyle::registerLanguageList("zh_Hans"));
490    FontStyle kZH_HantStyle = FontStyle(FontStyle::registerLanguageList("zh_Hant"));
491
492    // U+845B is available in both zh_Hans and ja font, but U+845B,U+E0100 is
493    // only available in ja font.
494    itemize(collection.get(), "U+845B", kZH_HansStyle, &runs);
495    ASSERT_EQ(1U, runs.size());
496    EXPECT_EQ(0, runs[0].start);
497    EXPECT_EQ(1, runs[0].end);
498    EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
499
500    itemize(collection.get(), "U+845B U+E0100", kZH_HansStyle, &runs);
501    ASSERT_EQ(1U, runs.size());
502    EXPECT_EQ(0, runs[0].start);
503    EXPECT_EQ(3, runs[0].end);
504    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
505
506    itemize(collection.get(), "U+845B U+845B U+E0100", kZH_HansStyle, &runs);
507    ASSERT_EQ(2U, runs.size());
508    EXPECT_EQ(0, runs[0].start);
509    EXPECT_EQ(1, runs[0].end);
510    EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
511    EXPECT_EQ(1, runs[1].start);
512    EXPECT_EQ(4, runs[1].end);
513    EXPECT_EQ(kJAFont, getFontPath(runs[1]));
514
515    itemize(collection.get(), "U+845B U+845B U+E0100 U+845B", kZH_HansStyle, &runs);
516    ASSERT_EQ(3U, runs.size());
517    EXPECT_EQ(0, runs[0].start);
518    EXPECT_EQ(1, runs[0].end);
519    EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
520    EXPECT_EQ(1, runs[1].start);
521    EXPECT_EQ(4, runs[1].end);
522    EXPECT_EQ(kJAFont, getFontPath(runs[1]));
523    EXPECT_EQ(4, runs[2].start);
524    EXPECT_EQ(5, runs[2].end);
525    EXPECT_EQ(kZH_HansFont, getFontPath(runs[2]));
526
527    // Validation selector after validation selector.
528    itemize(collection.get(), "U+845B U+E0100 U+E0100", kZH_HansStyle, &runs);
529    ASSERT_EQ(1U, runs.size());
530    EXPECT_EQ(0, runs[0].start);
531    EXPECT_EQ(5, runs[0].end);
532    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
533
534    // No font supports U+845B U+E01E0.
535    itemize(collection.get(), "U+845B U+E01E0", kZH_HansStyle, &runs);
536    ASSERT_EQ(1U, runs.size());
537    EXPECT_EQ(0, runs[0].start);
538    EXPECT_EQ(3, runs[0].end);
539    EXPECT_EQ(kZH_HansFont, getFontPath(runs[0]));
540
541    // Isolated variation selector supplement
542    // Surrogate pairs handling.
543    // U+242EE is available in ja font and zh_Hant font.
544    // U+242EE U+E0100 is available only in ja font.
545    itemize(collection.get(), "U+242EE", kZH_HantStyle, &runs);
546    ASSERT_EQ(1U, runs.size());
547    EXPECT_EQ(0, runs[0].start);
548    EXPECT_EQ(2, runs[0].end);
549    EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
550
551    itemize(collection.get(), "U+242EE U+E0101", kZH_HantStyle, &runs);
552    ASSERT_EQ(1U, runs.size());
553    EXPECT_EQ(0, runs[0].start);
554    EXPECT_EQ(4, runs[0].end);
555    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
556
557    itemize(collection.get(), "U+242EE U+242EE U+E0101", kZH_HantStyle, &runs);
558    ASSERT_EQ(2U, runs.size());
559    EXPECT_EQ(0, runs[0].start);
560    EXPECT_EQ(2, runs[0].end);
561    EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
562    EXPECT_EQ(2, runs[1].start);
563    EXPECT_EQ(6, runs[1].end);
564    EXPECT_EQ(kJAFont, getFontPath(runs[1]));
565
566    itemize(collection.get(), "U+242EE U+242EE U+E0101 U+242EE", kZH_HantStyle, &runs);
567    ASSERT_EQ(3U, runs.size());
568    EXPECT_EQ(0, runs[0].start);
569    EXPECT_EQ(2, runs[0].end);
570    EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
571    EXPECT_EQ(2, runs[1].start);
572    EXPECT_EQ(6, runs[1].end);
573    EXPECT_EQ(kJAFont, getFontPath(runs[1]));
574    EXPECT_EQ(6, runs[2].start);
575    EXPECT_EQ(8, runs[2].end);
576    EXPECT_EQ(kZH_HantFont, getFontPath(runs[2]));
577
578    // Validation selector after validation selector.
579    itemize(collection.get(), "U+242EE U+E0100 U+E0100", kZH_HantStyle, &runs);
580    ASSERT_EQ(1U, runs.size());
581    EXPECT_EQ(0, runs[0].start);
582    EXPECT_EQ(6, runs[0].end);
583    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
584
585    // No font supports U+242EE U+E01E0.
586    itemize(collection.get(), "U+242EE U+E01E0", kZH_HantStyle, &runs);
587    ASSERT_EQ(1U, runs.size());
588    EXPECT_EQ(0, runs[0].start);
589    EXPECT_EQ(4, runs[0].end);
590    EXPECT_EQ(kZH_HantFont, getFontPath(runs[0]));
591
592    // Isolated variation selector supplement.
593    itemize(collection.get(), "U+E0100", FontStyle(), &runs);
594    ASSERT_EQ(1U, runs.size());
595    EXPECT_EQ(0, runs[0].start);
596    EXPECT_EQ(2, runs[0].end);
597    EXPECT_TRUE(runs[0].fakedFont.font == nullptr || kLatinFont == getFontPath(runs[0]));
598
599    itemize(collection.get(), "U+E0100", kZH_HantStyle, &runs);
600    ASSERT_EQ(1U, runs.size());
601    EXPECT_EQ(0, runs[0].start);
602    EXPECT_EQ(2, runs[0].end);
603    EXPECT_TRUE(runs[0].fakedFont.font == nullptr || kLatinFont == getFontPath(runs[0]));
604}
605
606TEST_F(FontCollectionItemizeTest, itemize_no_crash) {
607    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
608    std::vector<FontCollection::Run> runs;
609
610    // Broken Surrogate pairs. Check only not crashing.
611    itemize(collection.get(), "'a' U+D83D 'a'", FontStyle(), &runs);
612    itemize(collection.get(), "'a' U+DC69 'a'", FontStyle(), &runs);
613    itemize(collection.get(), "'a' U+D83D U+D83D 'a'", FontStyle(), &runs);
614    itemize(collection.get(), "'a' U+DC69 U+DC69 'a'", FontStyle(), &runs);
615
616    // Isolated variation selector. Check only not crashing.
617    itemize(collection.get(), "U+FE00 U+FE00", FontStyle(), &runs);
618    itemize(collection.get(), "U+E0100 U+E0100", FontStyle(), &runs);
619    itemize(collection.get(), "U+FE00 U+E0100", FontStyle(), &runs);
620    itemize(collection.get(), "U+E0100 U+FE00", FontStyle(), &runs);
621
622    // Tone mark only. Check only not crashing.
623    itemize(collection.get(), "U+302D", FontStyle(), &runs);
624    itemize(collection.get(), "U+302D U+302D", FontStyle(), &runs);
625
626    // Tone mark and variation selector mixed. Check only not crashing.
627    itemize(collection.get(), "U+FE00 U+302D U+E0100", FontStyle(), &runs);
628}
629
630TEST_F(FontCollectionItemizeTest, itemize_fakery) {
631    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
632    std::vector<FontCollection::Run> runs;
633
634    FontStyle kJABoldStyle = FontStyle(FontStyle::registerLanguageList("ja_JP"), 0, 7, false);
635    FontStyle kJAItalicStyle = FontStyle(FontStyle::registerLanguageList("ja_JP"), 0, 5, true);
636    FontStyle kJABoldItalicStyle =
637           FontStyle(FontStyle::registerLanguageList("ja_JP"), 0, 7, true);
638
639    // Currently there is no italic or bold font for Japanese. FontFakery has
640    // the differences between desired and actual font style.
641
642    // All Japanese Hiragana characters.
643    itemize(collection.get(), "U+3042 U+3044 U+3046 U+3048 U+304A", kJABoldStyle, &runs);
644    ASSERT_EQ(1U, runs.size());
645    EXPECT_EQ(0, runs[0].start);
646    EXPECT_EQ(5, runs[0].end);
647    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
648    EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeBold());
649    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic());
650
651    // All Japanese Hiragana characters.
652    itemize(collection.get(), "U+3042 U+3044 U+3046 U+3048 U+304A", kJAItalicStyle, &runs);
653    ASSERT_EQ(1U, runs.size());
654    EXPECT_EQ(0, runs[0].start);
655    EXPECT_EQ(5, runs[0].end);
656    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
657    EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold());
658    EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeItalic());
659
660    // All Japanese Hiragana characters.
661    itemize(collection.get(), "U+3042 U+3044 U+3046 U+3048 U+304A", kJABoldItalicStyle, &runs);
662    ASSERT_EQ(1U, runs.size());
663    EXPECT_EQ(0, runs[0].start);
664    EXPECT_EQ(5, runs[0].end);
665    EXPECT_EQ(kJAFont, getFontPath(runs[0]));
666    EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeBold());
667    EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeItalic());
668}
669
670TEST_F(FontCollectionItemizeTest, itemize_vs_sequence_but_no_base_char) {
671    // kVSTestFont supports U+717D U+FE02 but doesn't support U+717D.
672    // kVSTestFont should be selected for U+717D U+FE02 even if it does not support the base code
673    // point.
674    const std::string kVSTestFont = kTestFontDir "VarioationSelectorTest-Regular.ttf";
675
676    std::vector<android::FontFamily*> families;
677    FontFamily* family1 = new FontFamily(android::VARIANT_DEFAULT);
678    family1->addFont(new MinikinFontForTest(kLatinFont));
679    families.push_back(family1);
680
681    FontFamily* family2 = new FontFamily(android::VARIANT_DEFAULT);
682    family2->addFont(new MinikinFontForTest(kVSTestFont));
683    families.push_back(family2);
684
685    FontCollection collection(families);
686
687    std::vector<FontCollection::Run> runs;
688
689    itemize(&collection, "U+717D U+FE02", FontStyle(), &runs);
690    ASSERT_EQ(1U, runs.size());
691    EXPECT_EQ(0, runs[0].start);
692    EXPECT_EQ(2, runs[0].end);
693    EXPECT_EQ(kVSTestFont, getFontPath(runs[0]));
694
695    family1->Unref();
696    family2->Unref();
697}
698
699TEST_F(FontCollectionItemizeTest, itemize_LanguageScore) {
700    struct TestCase {
701        std::string userPreferredLanguages;
702        std::vector<std::string> fontLanguages;
703        int selectedFontIndex;
704    } testCases[] = {
705        // Font can specify empty language.
706        { "und", { "", "" }, 0 },
707        { "und", { "", "en-Latn" }, 0 },
708        { "en-Latn", { "", "" }, 0 },
709        { "en-Latn", { "", "en-Latn" }, 1 },
710
711        // Single user preferred language.
712        // Exact match case
713        { "en-Latn", { "en-Latn", "ja-Jpan" }, 0 },
714        { "ja-Jpan", { "en-Latn", "ja-Jpan" }, 1 },
715        { "en-Latn", { "en-Latn", "nl-Latn", "es-Latn" }, 0 },
716        { "nl-Latn", { "en-Latn", "nl-Latn", "es-Latn" }, 1 },
717        { "es-Latn", { "en-Latn", "nl-Latn", "es-Latn" }, 2 },
718        { "es-Latn", { "en-Latn", "en-Latn", "nl-Latn" }, 0 },
719
720        // Exact script match case
721        { "en-Latn", { "nl-Latn", "e-Latn" }, 0 },
722        { "en-Arab", { "nl-Latn", "ar-Arab" }, 1 },
723        { "en-Latn", { "be-Latn", "ar-Arab", "d-Beng" }, 0 },
724        { "en-Arab", { "be-Latn", "ar-Arab", "d-Beng" }, 1 },
725        { "en-Beng", { "be-Latn", "ar-Arab", "d-Beng" }, 2 },
726        { "en-Beng", { "be-Latn", "ar-Beng", "d-Beng" }, 1 },
727        { "zh-Hant", { "zh-Hant", "zh-Hans" }, 0 },
728        { "zh-Hans", { "zh-Hant", "zh-Hans" }, 1 },
729
730        // Subscript match case, e.g. Jpan supports Hira.
731        { "en-Hira", { "ja-Jpan" }, 0 },
732        { "zh-Hani", { "zh-Hans", "zh-Hant" }, 0 },
733        { "zh-Hani", { "zh-Hant", "zh-Hans" }, 0 },
734        { "en-Hira", { "zh-Hant", "ja-Jpan", "ja-Jpan" }, 1 },
735
736        // Language match case
737        { "ja-Latn", { "zh-Latn", "ja-Latn" }, 1 },
738        { "zh-Latn", { "zh-Latn", "ja-Latn" }, 0 },
739        { "ja-Latn", { "zh-Latn", "ja-Latn" }, 1 },
740        { "ja-Latn", { "zh-Latn", "ja-Latn", "ja-Latn" }, 1 },
741
742        // Mixed case
743        // Script/subscript match is strongest.
744        { "ja-Jpan", { "en-Latn", "ja-Latn", "en-Jpan" }, 2 },
745        { "ja-Hira", { "en-Latn", "ja-Latn", "en-Jpan" }, 2 },
746        { "ja-Hira", { "en-Latn", "ja-Latn", "en-Jpan", "en-Jpan" }, 2 },
747
748        // Language match only happens if the script matches.
749        { "ja-Hira", { "en-Latn", "ja-Latn" }, 0 },
750        { "ja-Hira", { "en-Jpan", "ja-Jpan" }, 1 },
751
752        // Multiple languages.
753        // Even if all fonts have the same score, use the 2nd language for better selection.
754        { "en-Latn,ja-Jpan", { "zh-Hant", "zh-Hans", "ja-Jpan" }, 2 },
755        { "en-Latn,nl-Latn", { "es-Latn", "be-Latn", "nl-Latn" }, 2 },
756        { "en-Latn,br-Latn,nl-Latn", { "es-Latn", "be-Latn", "nl-Latn" }, 2 },
757        { "en-Latn,br-Latn,nl-Latn", { "es-Latn", "be-Latn", "nl-Latn", "nl-Latn" }, 2 },
758
759        // Script score.
760        { "en-Latn,ja-Jpan", { "en-Arab", "en-Jpan" }, 1 },
761        { "en-Latn,ja-Jpan", { "en-Arab", "en-Jpan", "en-Jpan" }, 1 },
762
763        // Language match case
764        { "en-Latn,ja-Latn", { "bd-Latn", "ja-Latn" }, 1 },
765        { "en-Latn,ja-Latn", { "bd-Latn", "ja-Latn", "ja-Latn" }, 1 },
766
767        // Language match only happens if the script matches.
768        { "en-Latn,ar-Arab", { "en-Beng", "ar-Arab" }, 1 },
769
770        // Multiple languages in the font settings.
771        { "ko-Jamo", { "ja-Jpan", "ko-Kore", "ko-Kore,ko-Jamo"}, 2 },
772        { "en-Latn", { "ja-Jpan", "en-Latn,ja-Jpan"}, 1 },
773        { "en-Latn", { "ja-Jpan", "ja-Jpan,en-Latn"}, 1 },
774        { "en-Latn", { "ja-Jpan,zh-Hant", "en-Latn,ja-Jpan", "en-Latn"}, 1 },
775        { "en-Latn", { "zh-Hant,ja-Jpan", "ja-Jpan,en-Latn", "en-Latn"}, 1 },
776
777        // Kore = Hang + Hani, etc.
778        { "ko-Kore", { "ko-Hang", "ko-Jamo,ko-Hani", "ko-Hang,ko-Hani"}, 2 },
779        { "ja-Hrkt", { "ja-Hira", "ja-Kana", "ja-Hira,ja-Kana"}, 2 },
780        { "ja-Jpan", { "ja-Hira", "ja-Kana", "ja-Hani", "ja-Hira,ja-Kana,ja-Hani"}, 3 },
781        { "zh-Hanb", { "zh-Hant", "zh-Bopo", "zh-Hant,zh-Bopo"}, 2 },
782        { "zh-Hanb", { "ja-Hanb", "zh-Hant,zh-Bopo"}, 1 },
783
784        // Language match with unified subscript bits.
785        { "zh-Hanb", { "zh-Hant", "zh-Bopo", "ja-Hant,ja-Bopo", "zh-Hant,zh-Bopo"}, 3 },
786        { "zh-Hanb", { "zh-Hant", "zh-Bopo", "ja-Hant,zh-Bopo", "zh-Hant,zh-Bopo"}, 3 },
787    };
788
789    for (auto testCase : testCases) {
790        std::string fontLanguagesStr = "{";
791        for (size_t i = 0; i < testCase.fontLanguages.size(); ++i) {
792            if (i != 0) {
793                fontLanguagesStr += ", ";
794            }
795            fontLanguagesStr += "\"" + testCase.fontLanguages[i] + "\"";
796        }
797        fontLanguagesStr += "}";
798        SCOPED_TRACE("Test of user preferred languages: \"" + testCase.userPreferredLanguages +
799                     "\" with font languages: " + fontLanguagesStr);
800
801        std::vector<FontFamily*> families;
802
803        // Prepare first font which doesn't supports U+9AA8
804        FontFamily* firstFamily = new FontFamily(
805                FontStyle::registerLanguageList("und"), 0 /* variant */);
806        MinikinFont* firstFamilyMinikinFont = new MinikinFontForTest(kNoGlyphFont);
807        firstFamily->addFont(firstFamilyMinikinFont);
808        families.push_back(firstFamily);
809
810        // Prepare font families
811        // Each font family is associated with a specified language. All font families except for
812        // the first font support U+9AA8.
813        std::unordered_map<MinikinFont*, int> fontLangIdxMap;
814
815        for (size_t i = 0; i < testCase.fontLanguages.size(); ++i) {
816            FontFamily* family = new FontFamily(
817                    FontStyle::registerLanguageList(testCase.fontLanguages[i]), 0 /* variant */);
818            MinikinFont* minikin_font = new MinikinFontForTest(kJAFont);
819            family->addFont(minikin_font);
820            families.push_back(family);
821            fontLangIdxMap.insert(std::make_pair(minikin_font, i));
822        }
823        FontCollection collection(families);
824        for (auto family : families) {
825            family->Unref();
826        }
827
828        // Do itemize
829        const FontStyle style = FontStyle(
830                FontStyle::registerLanguageList(testCase.userPreferredLanguages));
831        std::vector<FontCollection::Run> runs;
832        itemize(&collection, "U+9AA8", style, &runs);
833        ASSERT_EQ(1U, runs.size());
834        ASSERT_NE(nullptr, runs[0].fakedFont.font);
835
836        // First family doesn't support U+9AA8 and others support it, so the first font should not
837        // be selected.
838        EXPECT_NE(firstFamilyMinikinFont, runs[0].fakedFont.font);
839
840        // Lookup used font family by MinikinFont*.
841        const int usedLangIndex = fontLangIdxMap[runs[0].fakedFont.font];
842        EXPECT_EQ(testCase.selectedFontIndex, usedLangIndex);
843    }
844}
845
846TEST_F(FontCollectionItemizeTest, itemize_LanguageAndCoverage) {
847    struct TestCase {
848        std::string testString;
849        std::string requestedLanguages;
850        std::string expectedFont;
851    } testCases[] = {
852        // Following test cases verify that following rules in font fallback chain.
853        // - If the first font in the collection supports the given character or variation sequence,
854        //   it should be selected.
855        // - If the font doesn't support the given character, variation sequence or its base
856        //   character, it should not be selected.
857        // - If two or more fonts match the requested languages, the font matches with the highest
858        //   priority language should be selected.
859        // - If two or more fonts get the same score, the font listed earlier in the XML file
860        //   (here, kItemizeFontXml) should be selected.
861
862        // Regardless of language, the first font is always selected if it covers the code point.
863        { "'a'", "", kLatinFont},
864        { "'a'", "en-Latn", kLatinFont},
865        { "'a'", "ja-Jpan", kLatinFont},
866        { "'a'", "ja-Jpan,en-Latn", kLatinFont},
867        { "'a'", "zh-Hans,zh-Hant,en-Latn,ja-Jpan,fr-Latn", kLatinFont},
868
869        // U+81ED is supported by both the ja font and zh-Hans font.
870        { "U+81ED", "", kZH_HansFont },  // zh-Hans font is listed before ja font.
871        { "U+81ED", "en-Latn", kZH_HansFont },  // zh-Hans font is listed before ja font.
872        { "U+81ED", "ja-Jpan", kJAFont },
873        { "U+81ED", "zh-Hans", kZH_HansFont },
874
875        { "U+81ED", "ja-Jpan,en-Latn", kJAFont },
876        { "U+81ED", "en-Latn,ja-Jpan", kJAFont },
877        { "U+81ED", "en-Latn,zh-Hans", kZH_HansFont },
878        { "U+81ED", "zh-Hans,en-Latn", kZH_HansFont },
879        { "U+81ED", "ja-Jpan,zh-Hans", kJAFont },
880        { "U+81ED", "zh-Hans,ja-Jpan", kZH_HansFont },
881
882        { "U+81ED", "en-Latn,zh-Hans,ja-Jpan", kZH_HansFont },
883        { "U+81ED", "en-Latn,ja-Jpan,zh-Hans", kJAFont },
884        { "U+81ED", "en-Latn,zh-Hans,ja-Jpan", kZH_HansFont },
885        { "U+81ED", "ja-Jpan,en-Latn,zh-Hans", kJAFont },
886        { "U+81ED", "ja-Jpan,zh-Hans,en-Latn", kJAFont },
887        { "U+81ED", "zh-Hans,en-Latn,ja-Jpan", kZH_HansFont },
888        { "U+81ED", "zh-Hans,ja-Jpan,en-Latn", kZH_HansFont },
889
890        // U+304A is only supported by ja font.
891        { "U+304A", "", kJAFont },
892        { "U+304A", "ja-Jpan", kJAFont },
893        { "U+304A", "zh-Hant", kJAFont },
894        { "U+304A", "zh-Hans", kJAFont },
895
896        { "U+304A", "ja-Jpan,zh-Hant", kJAFont },
897        { "U+304A", "zh-Hant,ja-Jpan", kJAFont },
898        { "U+304A", "zh-Hans,zh-Hant", kJAFont },
899        { "U+304A", "zh-Hant,zh-Hans", kJAFont },
900        { "U+304A", "zh-Hans,ja-Jpan", kJAFont },
901        { "U+304A", "ja-Jpan,zh-Hans", kJAFont },
902
903        { "U+304A", "zh-Hans,ja-Jpan,zh-Hant", kJAFont },
904        { "U+304A", "zh-Hans,zh-Hant,ja-Jpan", kJAFont },
905        { "U+304A", "ja-Jpan,zh-Hans,zh-Hant", kJAFont },
906        { "U+304A", "ja-Jpan,zh-Hant,zh-Hans", kJAFont },
907        { "U+304A", "zh-Hant,zh-Hans,ja-Jpan", kJAFont },
908        { "U+304A", "zh-Hant,ja-Jpan,zh-Hans", kJAFont },
909
910        // U+242EE is supported by both ja font and zh-Hant fonts but not by zh-Hans font.
911        { "U+242EE", "", kJAFont },  // ja font is listed before zh-Hant font.
912        { "U+242EE", "ja-Jpan", kJAFont },
913        { "U+242EE", "zh-Hans", kJAFont },
914        { "U+242EE", "zh-Hant", kZH_HantFont },
915
916        { "U+242EE", "ja-Jpan,zh-Hant", kJAFont },
917        { "U+242EE", "zh-Hant,ja-Jpan", kZH_HantFont },
918        { "U+242EE", "zh-Hans,zh-Hant", kZH_HantFont },
919        { "U+242EE", "zh-Hant,zh-Hans", kZH_HantFont },
920        { "U+242EE", "zh-Hans,ja-Jpan", kJAFont },
921        { "U+242EE", "ja-Jpan,zh-Hans", kJAFont },
922
923        { "U+242EE", "zh-Hans,ja-Jpan,zh-Hant", kJAFont },
924        { "U+242EE", "zh-Hans,zh-Hant,ja-Jpan", kZH_HantFont },
925        { "U+242EE", "ja-Jpan,zh-Hans,zh-Hant", kJAFont },
926        { "U+242EE", "ja-Jpan,zh-Hant,zh-Hans", kJAFont },
927        { "U+242EE", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont },
928        { "U+242EE", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont },
929
930        // U+9AA8 is supported by all ja-Jpan, zh-Hans, zh-Hant fonts.
931        { "U+9AA8", "", kZH_HansFont },  // zh-Hans font is listed before ja and zh-Hant fonts.
932        { "U+9AA8", "ja-Jpan", kJAFont },
933        { "U+9AA8", "zh-Hans", kZH_HansFont },
934        { "U+9AA8", "zh-Hant", kZH_HantFont },
935
936        { "U+9AA8", "ja-Jpan,zh-Hant", kJAFont },
937        { "U+9AA8", "zh-Hant,ja-Jpan", kZH_HantFont },
938        { "U+9AA8", "zh-Hans,zh-Hant", kZH_HansFont },
939        { "U+9AA8", "zh-Hant,zh-Hans", kZH_HantFont },
940        { "U+9AA8", "zh-Hans,ja-Jpan", kZH_HansFont },
941        { "U+9AA8", "ja-Jpan,zh-Hans", kJAFont },
942
943        { "U+9AA8", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont },
944        { "U+9AA8", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont },
945        { "U+9AA8", "ja-Jpan,zh-Hans,zh-Hant", kJAFont },
946        { "U+9AA8", "ja-Jpan,zh-Hant,zh-Hans", kJAFont },
947        { "U+9AA8", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont },
948        { "U+9AA8", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont },
949
950        // U+242EE U+FE00 is supported by ja font but not by zh-Hans or zh-Hant fonts.
951        { "U+242EE U+FE00", "", kJAFont },
952        { "U+242EE U+FE00", "ja-Jpan", kJAFont },
953        { "U+242EE U+FE00", "zh-Hant", kJAFont },
954        { "U+242EE U+FE00", "zh-Hans", kJAFont },
955
956        { "U+242EE U+FE00", "ja-Jpan,zh-Hant", kJAFont },
957        { "U+242EE U+FE00", "zh-Hant,ja-Jpan", kJAFont },
958        { "U+242EE U+FE00", "zh-Hans,zh-Hant", kJAFont },
959        { "U+242EE U+FE00", "zh-Hant,zh-Hans", kJAFont },
960        { "U+242EE U+FE00", "zh-Hans,ja-Jpan", kJAFont },
961        { "U+242EE U+FE00", "ja-Jpan,zh-Hans", kJAFont },
962
963        { "U+242EE U+FE00", "zh-Hans,ja-Jpan,zh-Hant", kJAFont },
964        { "U+242EE U+FE00", "zh-Hans,zh-Hant,ja-Jpan", kJAFont },
965        { "U+242EE U+FE00", "ja-Jpan,zh-Hans,zh-Hant", kJAFont },
966        { "U+242EE U+FE00", "ja-Jpan,zh-Hant,zh-Hans", kJAFont },
967        { "U+242EE U+FE00", "zh-Hant,zh-Hans,ja-Jpan", kJAFont },
968        { "U+242EE U+FE00", "zh-Hant,ja-Jpan,zh-Hans", kJAFont },
969
970        // U+3402 U+E0100 is supported by both zh-Hans and zh-Hant but not by ja font.
971        { "U+3402 U+E0100", "", kZH_HansFont },  // zh-Hans font is listed before zh-Hant font.
972        { "U+3402 U+E0100", "ja-Jpan", kZH_HansFont },  // zh-Hans font is listed before zh-Hant font.
973        { "U+3402 U+E0100", "zh-Hant", kZH_HantFont },
974        { "U+3402 U+E0100", "zh-Hans", kZH_HansFont },
975
976        { "U+3402 U+E0100", "ja-Jpan,zh-Hant", kZH_HantFont },
977        { "U+3402 U+E0100", "zh-Hant,ja-Jpan", kZH_HantFont },
978        { "U+3402 U+E0100", "zh-Hans,zh-Hant", kZH_HansFont },
979        { "U+3402 U+E0100", "zh-Hant,zh-Hans", kZH_HantFont },
980        { "U+3402 U+E0100", "zh-Hans,ja-Jpan", kZH_HansFont },
981        { "U+3402 U+E0100", "ja-Jpan,zh-Hans", kZH_HansFont },
982
983        { "U+3402 U+E0100", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont },
984        { "U+3402 U+E0100", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont },
985        { "U+3402 U+E0100", "ja-Jpan,zh-Hans,zh-Hant", kZH_HansFont },
986        { "U+3402 U+E0100", "ja-Jpan,zh-Hant,zh-Hans", kZH_HantFont },
987        { "U+3402 U+E0100", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont },
988        { "U+3402 U+E0100", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont },
989
990        // No font supports U+4444 U+FE00 but only zh-Hans supports its base character U+4444.
991        { "U+4444 U+FE00", "", kZH_HansFont },
992        { "U+4444 U+FE00", "ja-Jpan", kZH_HansFont },
993        { "U+4444 U+FE00", "zh-Hant", kZH_HansFont },
994        { "U+4444 U+FE00", "zh-Hans", kZH_HansFont },
995
996        { "U+4444 U+FE00", "ja-Jpan,zh-Hant", kZH_HansFont },
997        { "U+4444 U+FE00", "zh-Hant,ja-Jpan", kZH_HansFont },
998        { "U+4444 U+FE00", "zh-Hans,zh-Hant", kZH_HansFont },
999        { "U+4444 U+FE00", "zh-Hant,zh-Hans", kZH_HansFont },
1000        { "U+4444 U+FE00", "zh-Hans,ja-Jpan", kZH_HansFont },
1001        { "U+4444 U+FE00", "ja-Jpan,zh-Hans", kZH_HansFont },
1002
1003        { "U+4444 U+FE00", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont },
1004        { "U+4444 U+FE00", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont },
1005        { "U+4444 U+FE00", "ja-Jpan,zh-Hans,zh-Hant", kZH_HansFont },
1006        { "U+4444 U+FE00", "ja-Jpan,zh-Hant,zh-Hans", kZH_HansFont },
1007        { "U+4444 U+FE00", "zh-Hant,zh-Hans,ja-Jpan", kZH_HansFont },
1008        { "U+4444 U+FE00", "zh-Hant,ja-Jpan,zh-Hans", kZH_HansFont },
1009
1010        // No font supports U+81ED U+E0100 but ja and zh-Hans support its base character U+81ED.
1011        // zh-Hans font is listed before ja font.
1012        { "U+81ED U+E0100", "", kZH_HansFont },
1013        { "U+81ED U+E0100", "ja-Jpan", kJAFont },
1014        { "U+81ED U+E0100", "zh-Hant", kZH_HansFont },
1015        { "U+81ED U+E0100", "zh-Hans", kZH_HansFont },
1016
1017        { "U+81ED U+E0100", "ja-Jpan,zh-Hant", kJAFont },
1018        { "U+81ED U+E0100", "zh-Hant,ja-Jpan", kJAFont },
1019        { "U+81ED U+E0100", "zh-Hans,zh-Hant", kZH_HansFont },
1020        { "U+81ED U+E0100", "zh-Hant,zh-Hans", kZH_HansFont },
1021        { "U+81ED U+E0100", "zh-Hans,ja-Jpan", kZH_HansFont },
1022        { "U+81ED U+E0100", "ja-Jpan,zh-Hans", kJAFont },
1023
1024        { "U+81ED U+E0100", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont },
1025        { "U+81ED U+E0100", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont },
1026        { "U+81ED U+E0100", "ja-Jpan,zh-Hans,zh-Hant", kJAFont },
1027        { "U+81ED U+E0100", "ja-Jpan,zh-Hant,zh-Hans", kJAFont },
1028        { "U+81ED U+E0100", "zh-Hant,zh-Hans,ja-Jpan", kZH_HansFont },
1029        { "U+81ED U+E0100", "zh-Hant,ja-Jpan,zh-Hans", kJAFont },
1030
1031        // No font supports U+9AA8 U+E0100 but all zh-Hans zh-hant ja fonts support its base
1032        // character U+9AA8.
1033        // zh-Hans font is listed before ja and zh-Hant fonts.
1034        { "U+9AA8 U+E0100", "", kZH_HansFont },
1035        { "U+9AA8 U+E0100", "ja-Jpan", kJAFont },
1036        { "U+9AA8 U+E0100", "zh-Hans", kZH_HansFont },
1037        { "U+9AA8 U+E0100", "zh-Hant", kZH_HantFont },
1038
1039        { "U+9AA8 U+E0100", "ja-Jpan,zh-Hant", kJAFont },
1040        { "U+9AA8 U+E0100", "zh-Hant,ja-Jpan", kZH_HantFont },
1041        { "U+9AA8 U+E0100", "zh-Hans,zh-Hant", kZH_HansFont },
1042        { "U+9AA8 U+E0100", "zh-Hant,zh-Hans", kZH_HantFont },
1043        { "U+9AA8 U+E0100", "zh-Hans,ja-Jpan", kZH_HansFont },
1044        { "U+9AA8 U+E0100", "ja-Jpan,zh-Hans", kJAFont },
1045
1046        { "U+9AA8 U+E0100", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont },
1047        { "U+9AA8 U+E0100", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont },
1048        { "U+9AA8 U+E0100", "ja-Jpan,zh-Hans,zh-Hant", kJAFont },
1049        { "U+9AA8 U+E0100", "ja-Jpan,zh-Hant,zh-Hans", kJAFont },
1050        { "U+9AA8 U+E0100", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont },
1051        { "U+9AA8 U+E0100", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont },
1052
1053        // All zh-Hans,zh-Hant,ja fonts support U+35A8 U+E0100 and its base character U+35A8.
1054        // zh-Hans font is listed before ja and zh-Hant fonts.
1055        { "U+35A8", "", kZH_HansFont },
1056        { "U+35A8", "ja-Jpan", kJAFont },
1057        { "U+35A8", "zh-Hans", kZH_HansFont },
1058        { "U+35A8", "zh-Hant", kZH_HantFont },
1059
1060        { "U+35A8", "ja-Jpan,zh-Hant", kJAFont },
1061        { "U+35A8", "zh-Hant,ja-Jpan", kZH_HantFont },
1062        { "U+35A8", "zh-Hans,zh-Hant", kZH_HansFont },
1063        { "U+35A8", "zh-Hant,zh-Hans", kZH_HantFont },
1064        { "U+35A8", "zh-Hans,ja-Jpan", kZH_HansFont },
1065        { "U+35A8", "ja-Jpan,zh-Hans", kJAFont },
1066
1067        { "U+35A8", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont },
1068        { "U+35A8", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont },
1069        { "U+35A8", "ja-Jpan,zh-Hans,zh-Hant", kJAFont },
1070        { "U+35A8", "ja-Jpan,zh-Hant,zh-Hans", kJAFont },
1071        { "U+35A8", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont },
1072        { "U+35A8", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont },
1073
1074        // All zh-Hans,zh-Hant,ja fonts support U+35B6 U+E0100, but zh-Hant and ja fonts support its
1075        // base character U+35B6.
1076        // ja font is listed before zh-Hant font.
1077        { "U+35B6", "", kJAFont },
1078        { "U+35B6", "ja-Jpan", kJAFont },
1079        { "U+35B6", "zh-Hant", kZH_HantFont },
1080        { "U+35B6", "zh-Hans", kJAFont },
1081
1082        { "U+35B6", "ja-Jpan,zh-Hant", kJAFont },
1083        { "U+35B6", "zh-Hant,ja-Jpan", kZH_HantFont },
1084        { "U+35B6", "zh-Hans,zh-Hant", kZH_HantFont },
1085        { "U+35B6", "zh-Hant,zh-Hans", kZH_HantFont },
1086        { "U+35B6", "zh-Hans,ja-Jpan", kJAFont },
1087        { "U+35B6", "ja-Jpan,zh-Hans", kJAFont },
1088
1089        { "U+35B6", "zh-Hans,ja-Jpan,zh-Hant", kJAFont },
1090        { "U+35B6", "zh-Hans,zh-Hant,ja-Jpan", kZH_HantFont },
1091        { "U+35B6", "ja-Jpan,zh-Hans,zh-Hant", kJAFont },
1092        { "U+35B6", "ja-Jpan,zh-Hant,zh-Hans", kJAFont },
1093        { "U+35B6", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont },
1094        { "U+35B6", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont },
1095
1096        // All zh-Hans,zh-Hant,ja fonts support U+35C5 U+E0100, but only ja font supports its base
1097        // character U+35C5.
1098        { "U+35C5", "", kJAFont },
1099        { "U+35C5", "ja-Jpan", kJAFont },
1100        { "U+35C5", "zh-Hant", kJAFont },
1101        { "U+35C5", "zh-Hans", kJAFont },
1102
1103        { "U+35C5", "ja-Jpan,zh-Hant", kJAFont },
1104        { "U+35C5", "zh-Hant,ja-Jpan", kJAFont },
1105        { "U+35C5", "zh-Hans,zh-Hant", kJAFont },
1106        { "U+35C5", "zh-Hant,zh-Hans", kJAFont },
1107        { "U+35C5", "zh-Hans,ja-Jpan", kJAFont },
1108        { "U+35C5", "ja-Jpan,zh-Hans", kJAFont },
1109
1110        { "U+35C5", "zh-Hans,ja-Jpan,zh-Hant", kJAFont },
1111        { "U+35C5", "zh-Hans,zh-Hant,ja-Jpan", kJAFont },
1112        { "U+35C5", "ja-Jpan,zh-Hans,zh-Hant", kJAFont },
1113        { "U+35C5", "ja-Jpan,zh-Hant,zh-Hans", kJAFont },
1114        { "U+35C5", "zh-Hant,zh-Hans,ja-Jpan", kJAFont },
1115        { "U+35C5", "zh-Hant,ja-Jpan,zh-Hans", kJAFont },
1116
1117        // None of ja-Jpan, zh-Hant, zh-Hans font supports U+1F469. Emoji font supports it.
1118        { "U+1F469", "", kEmojiFont },
1119        { "U+1F469", "ja-Jpan", kEmojiFont },
1120        { "U+1F469", "zh-Hant", kEmojiFont },
1121        { "U+1F469", "zh-Hans", kEmojiFont },
1122
1123        { "U+1F469", "ja-Jpan,zh-Hant", kEmojiFont },
1124        { "U+1F469", "zh-Hant,ja-Jpan", kEmojiFont },
1125        { "U+1F469", "zh-Hans,zh-Hant", kEmojiFont },
1126        { "U+1F469", "zh-Hant,zh-Hans", kEmojiFont },
1127        { "U+1F469", "zh-Hans,ja-Jpan", kEmojiFont },
1128        { "U+1F469", "ja-Jpan,zh-Hans", kEmojiFont },
1129
1130        { "U+1F469", "zh-Hans,ja-Jpan,zh-Hant", kEmojiFont },
1131        { "U+1F469", "zh-Hans,zh-Hant,ja-Jpan", kEmojiFont },
1132        { "U+1F469", "ja-Jpan,zh-Hans,zh-Hant", kEmojiFont },
1133        { "U+1F469", "ja-Jpan,zh-Hant,zh-Hans", kEmojiFont },
1134        { "U+1F469", "zh-Hant,zh-Hans,ja-Jpan", kEmojiFont },
1135        { "U+1F469", "zh-Hant,ja-Jpan,zh-Hans", kEmojiFont },
1136    };
1137
1138    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kItemizeFontXml));
1139
1140    for (auto testCase : testCases) {
1141        SCOPED_TRACE("Test for \"" + testCase.testString + "\" with languages " +
1142                     testCase.requestedLanguages);
1143
1144        std::vector<FontCollection::Run> runs;
1145        const FontStyle style =
1146                FontStyle(FontStyle::registerLanguageList(testCase.requestedLanguages));
1147        itemize(collection.get(), testCase.testString.c_str(), style, &runs);
1148        ASSERT_EQ(1U, runs.size());
1149        EXPECT_EQ(testCase.expectedFont, getFontPath(runs[0]));
1150    }
1151}
1152
1153TEST_F(FontCollectionItemizeTest, itemize_emojiSelection_withFE0E) {
1154    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
1155    std::vector<FontCollection::Run> runs;
1156
1157    const FontStyle kDefaultFontStyle;
1158
1159    // U+00A9 is a text default emoji which is only available in TextEmojiFont.ttf.
1160    // TextEmojiFont.ttf should be selected.
1161    itemize(collection.get(), "U+00A9 U+FE0E", kDefaultFontStyle, &runs);
1162    ASSERT_EQ(1U, runs.size());
1163    EXPECT_EQ(0, runs[0].start);
1164    EXPECT_EQ(2, runs[0].end);
1165    EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0]));
1166
1167    // U+00A9 is a text default emoji which is only available in ColorEmojiFont.ttf.
1168    // ColorEmojiFont.ttf should be selected.
1169    itemize(collection.get(), "U+00AE U+FE0E", kDefaultFontStyle, &runs);
1170    ASSERT_EQ(1U, runs.size());
1171    EXPECT_EQ(0, runs[0].start);
1172    EXPECT_EQ(2, runs[0].end);
1173    // Text emoji is specified but it is not available. Use color emoji instead.
1174    EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
1175
1176    // U+203C is a text default emoji which is available in both TextEmojiFont.ttf and
1177    // ColorEmojiFont.ttf. TextEmojiFont.ttf should be selected.
1178    itemize(collection.get(), "U+203C U+FE0E", kDefaultFontStyle, &runs);
1179    ASSERT_EQ(1U, runs.size());
1180    EXPECT_EQ(0, runs[0].start);
1181    EXPECT_EQ(2, runs[0].end);
1182    EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0]));
1183
1184    // U+2049 is a text default emoji which is not available either TextEmojiFont.ttf or
1185    // ColorEmojiFont.ttf. No font should be selected.
1186    itemize(collection.get(), "U+2049 U+FE0E", kDefaultFontStyle, &runs);
1187    ASSERT_EQ(1U, runs.size());
1188    EXPECT_EQ(0, runs[0].start);
1189    EXPECT_EQ(2, runs[0].end);
1190    EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0]));
1191
1192    // U+231A is a emoji default emoji which is available only in TextEmojifFont.
1193    // TextEmojiFont.ttf sohuld be selected.
1194    itemize(collection.get(), "U+231A U+FE0E", kDefaultFontStyle, &runs);
1195    ASSERT_EQ(1U, runs.size());
1196    EXPECT_EQ(0, runs[0].start);
1197    EXPECT_EQ(2, runs[0].end);
1198    EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0]));
1199
1200    // U+231B is a emoji default emoji which is available only in ColorEmojiFont.ttf.
1201    // ColorEmojiFont.ttf should be selected.
1202    itemize(collection.get(), "U+231B U+FE0E", kDefaultFontStyle, &runs);
1203    ASSERT_EQ(1U, runs.size());
1204    EXPECT_EQ(0, runs[0].start);
1205    EXPECT_EQ(2, runs[0].end);
1206    // Text emoji is specified but it is not available. Use color emoji instead.
1207    EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
1208
1209    // U+23E9 is a emoji default emoji which is available in both TextEmojiFont.ttf and
1210    // ColorEmojiFont.ttf. TextEmojiFont.ttf should be selected even if U+23E9 is emoji default
1211    // emoji since U+FE0E is appended.
1212    itemize(collection.get(), "U+23E9 U+FE0E", kDefaultFontStyle, &runs);
1213    ASSERT_EQ(1U, runs.size());
1214    EXPECT_EQ(0, runs[0].start);
1215    EXPECT_EQ(2, runs[0].end);
1216    EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0]));
1217
1218    // U+23EA is a emoji default emoji but which is not available in either TextEmojiFont.ttf or
1219    // ColorEmojiFont.ttf. No font should be selected.
1220    itemize(collection.get(), "U+23EA U+FE0E", kDefaultFontStyle, &runs);
1221    ASSERT_EQ(1U, runs.size());
1222    EXPECT_EQ(0, runs[0].start);
1223    EXPECT_EQ(2, runs[0].end);
1224    EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0]));
1225
1226    // U+26FA U+FE0E is specified but ColorTextMixedEmojiFont has a variation sequence U+26F9 U+FE0F
1227    // in its cmap, so ColorTextMixedEmojiFont should be selected instaed of ColorEmojiFont.
1228    itemize(collection.get(), "U+26FA U+FE0E", kDefaultFontStyle, &runs);
1229    ASSERT_EQ(1U, runs.size());
1230    EXPECT_EQ(0, runs[0].start);
1231    EXPECT_EQ(2, runs[0].end);
1232    EXPECT_EQ(kMixedEmojiFont, getFontPath(runs[0]));
1233}
1234
1235TEST_F(FontCollectionItemizeTest, itemize_emojiSelection_withFE0F) {
1236    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
1237    std::vector<FontCollection::Run> runs;
1238
1239    const FontStyle kDefaultFontStyle;
1240
1241    // U+00A9 is a text default emoji which is available only in TextEmojiFont.ttf.
1242    // TextEmojiFont.ttf shoudl be selected.
1243    itemize(collection.get(), "U+00A9 U+FE0F", kDefaultFontStyle, &runs);
1244    ASSERT_EQ(1U, runs.size());
1245    EXPECT_EQ(0, runs[0].start);
1246    EXPECT_EQ(2, runs[0].end);
1247    // Color emoji is specified but it is not available. Use text representaion instead.
1248    EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0]));
1249
1250    // U+00AE is a text default emoji which is available only in ColorEmojiFont.ttf.
1251    // ColorEmojiFont.ttf should be selected.
1252    itemize(collection.get(), "U+00AE U+FE0F", kDefaultFontStyle, &runs);
1253    ASSERT_EQ(1U, runs.size());
1254    EXPECT_EQ(0, runs[0].start);
1255    EXPECT_EQ(2, runs[0].end);
1256    EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
1257
1258    // U+203C is a text default emoji which is available in both TextEmojiFont.ttf and
1259    // ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected even if U+203C is a text default
1260    // emoji since U+FF0F is appended.
1261    itemize(collection.get(), "U+203C U+FE0F", kDefaultFontStyle, &runs);
1262    ASSERT_EQ(1U, runs.size());
1263    EXPECT_EQ(0, runs[0].start);
1264    EXPECT_EQ(2, runs[0].end);
1265    EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
1266
1267    // U+2049 is a text default emoji which is not available in either TextEmojiFont.ttf or
1268    // ColorEmojiFont.ttf. No font should be selected.
1269    itemize(collection.get(), "U+2049 U+FE0F", kDefaultFontStyle, &runs);
1270    ASSERT_EQ(1U, runs.size());
1271    EXPECT_EQ(0, runs[0].start);
1272    EXPECT_EQ(2, runs[0].end);
1273    EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0]));
1274
1275    // U+231A is a emoji default emoji which is available only in TextEmojiFont.ttf.
1276    // TextEmojiFont.ttf should be selected.
1277    itemize(collection.get(), "U+231A U+FE0F", kDefaultFontStyle, &runs);
1278    ASSERT_EQ(1U, runs.size());
1279    EXPECT_EQ(0, runs[0].start);
1280    EXPECT_EQ(2, runs[0].end);
1281    // Color emoji is specified but it is not available. Use text representation instead.
1282    EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0]));
1283
1284    // U+231B is a emoji default emoji which is available only in ColorEmojiFont.ttf.
1285    // ColorEmojiFont.ttf should be selected.
1286    itemize(collection.get(), "U+231B U+FE0F", kDefaultFontStyle, &runs);
1287    ASSERT_EQ(1U, runs.size());
1288    EXPECT_EQ(0, runs[0].start);
1289    EXPECT_EQ(2, runs[0].end);
1290    EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
1291
1292    // U+23E9 is a emoji default emoji which is available in both TextEmojiFont.ttf and
1293    // ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected.
1294    itemize(collection.get(), "U+23E9 U+FE0F", kDefaultFontStyle, &runs);
1295    ASSERT_EQ(1U, runs.size());
1296    EXPECT_EQ(0, runs[0].start);
1297    EXPECT_EQ(2, runs[0].end);
1298    EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
1299
1300    // U+23EA is a emoji default emoji which is not available in either TextEmojiFont.ttf or
1301    // ColorEmojiFont.ttf. No font should be selected.
1302    itemize(collection.get(), "U+23EA U+FE0F", kDefaultFontStyle, &runs);
1303    ASSERT_EQ(1U, runs.size());
1304    EXPECT_EQ(0, runs[0].start);
1305    EXPECT_EQ(2, runs[0].end);
1306    EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0]));
1307
1308    // U+26F9 U+FE0F is specified but ColorTextMixedEmojiFont has a variation sequence U+26F9 U+FE0F
1309    // in its cmap, so ColorTextMixedEmojiFont should be selected instaed of ColorEmojiFont.
1310    itemize(collection.get(), "U+26F9 U+FE0F", kDefaultFontStyle, &runs);
1311    ASSERT_EQ(1U, runs.size());
1312    EXPECT_EQ(0, runs[0].start);
1313    EXPECT_EQ(2, runs[0].end);
1314    EXPECT_EQ(kMixedEmojiFont, getFontPath(runs[0]));
1315}
1316
1317TEST_F(FontCollectionItemizeTest, itemize_emojiSelection_with_skinTone) {
1318    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
1319    std::vector<FontCollection::Run> runs;
1320
1321    const FontStyle kDefaultFontStyle;
1322
1323    // TextEmoji font is selected since it is listed before ColorEmoji font.
1324    itemize(collection.get(), "U+261D", kDefaultFontStyle, &runs);
1325    ASSERT_EQ(1U, runs.size());
1326    EXPECT_EQ(0, runs[0].start);
1327    EXPECT_EQ(1, runs[0].end);
1328    EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0]));
1329
1330    // If skin tone is specified, it should be colored.
1331    itemize(collection.get(), "U+261D U+1F3FD", kDefaultFontStyle, &runs);
1332    ASSERT_EQ(1U, runs.size());
1333    EXPECT_EQ(0, runs[0].start);
1334    EXPECT_EQ(3, runs[0].end);
1335    EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
1336
1337    // Still color font is selected if an emoji variation selector is specified.
1338    itemize(collection.get(), "U+261D U+FE0F U+1F3FD", kDefaultFontStyle, &runs);
1339    ASSERT_EQ(1U, runs.size());
1340    EXPECT_EQ(0, runs[0].start);
1341    EXPECT_EQ(4, runs[0].end);
1342    EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0]));
1343
1344    // Text font should be selected if a text variation selector is specified and skin tone is
1345    // rendered by itself.
1346    itemize(collection.get(), "U+261D U+FE0E U+1F3FD", kDefaultFontStyle, &runs);
1347    ASSERT_EQ(2U, runs.size());
1348    EXPECT_EQ(0, runs[0].start);
1349    EXPECT_EQ(2, runs[0].end);
1350    EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0]));
1351    EXPECT_EQ(2, runs[1].start);
1352    EXPECT_EQ(4, runs[1].end);
1353    EXPECT_EQ(kColorEmojiFont, getFontPath(runs[1]));
1354}
1355
1356TEST_F(FontCollectionItemizeTest, itemize_PrivateUseArea) {
1357    MinikinAutoUnref<FontCollection> collection(getFontCollection(kTestFontDir, kEmojiXmlFile));
1358    std::vector<FontCollection::Run> runs;
1359
1360    const FontStyle kDefaultFontStyle;
1361
1362    // Should not set nullptr to the result run. (Issue 26808815)
1363    itemize(collection.get(), "U+FEE10", kDefaultFontStyle, &runs);
1364    ASSERT_EQ(1U, runs.size());
1365    EXPECT_EQ(0, runs[0].start);
1366    EXPECT_EQ(2, runs[0].end);
1367    EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0]));
1368
1369    itemize(collection.get(), "U+FEE40 U+FE4C5", kDefaultFontStyle, &runs);
1370    ASSERT_EQ(1U, runs.size());
1371    EXPECT_EQ(0, runs[0].start);
1372    EXPECT_EQ(4, runs[0].end);
1373    EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0]));
1374}
1375