GraphemeBreak.cpp revision adfa580f1f067c846509b4346e5be2cb19177c1b
1edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project/*
2edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * Copyright (C) 2014 The Android Open Source Project
3edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project *
4edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License");
5edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * you may not use this file except in compliance with the License.
6edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * You may obtain a copy of the License at
7edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project *
8edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project *      http://www.apache.org/licenses/LICENSE-2.0
9edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project *
10edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * Unless required by applicable law or agreed to in writing, software
11edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS,
12edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * See the License for the specific language governing permissions and
14edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * limitations under the License.
15edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project */
16edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
17edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project#include <stdint.h>
18edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project#include <unicode/uchar.h>
19edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project#include <unicode/utf16.h>
20edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
21edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project#include <minikin/GraphemeBreak.h>
22edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
23edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectnamespace android {
24edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
25edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectint32_t tailoredGraphemeClusterBreak(uint32_t c) {
26edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Characters defined as Control that we want to treat them as Extend.
27edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // These are curated manually.
28edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (c == 0x00AD                         // SHY
29edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x061C                  // ALM
30edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x180E                  // MONGOLIAN VOWEL SEPARATOR
31edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x200B                  // ZWSP
32edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x200E                  // LRM
33edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x200F                  // RLM
34edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || (0x202A <= c && c <= 0x202E) // LRE, RLE, PDF, LRO, RLO
35edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || ((c | 0xF) == 0x206F)        // WJ, invisible math operators, LRI, RLI, FSI, PDI,
36edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                                            // and the deprecated invisible format controls
37edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0xFEFF                  // BOM
38edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || ((c | 0x7F) == 0xE007F))     // recently undeprecated tag characters in Plane 14
39edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return U_GCB_EXTEND;
40edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // UTC-approved characters for the Prepend class, per
41edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // http://www.unicode.org/L2/L2015/15183r-graph-cluster-brk.txt
42edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // These should be removed when our copy of ICU gets updated to Unicode 9.0 (~2016 or 2017).
43edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    else if ((0x0600 <= c && c <= 0x0605) // Arabic subtending marks
44edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x06DD                // ARABIC SUBTENDING MARK
45edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x070F                // SYRIAC ABBREVIATION MARK
46edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x0D4E                // MALAYALAM LETTER DOT REPH
47edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x110BD               // KAITHI NUMBER SIGN
48edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x111C2               // SHARADA SIGN JIHVAMULIYA
49edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x111C3)              // SHARADA SIGN UPADHMANIYA
50edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return U_GCB_PREPEND;
51edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // THAI CHARACTER SARA AM is treated as a normal letter by most other implementations: they
52edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // allow a grapheme break before it.
53edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    else if (c == 0x0E33)
54edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return U_GCB_OTHER;
55edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    else
56edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
57edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project}
58edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
59edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// Returns true for all characters whose IndicSyllabicCategory is Pure_Killer.
60edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// From http://www.unicode.org/Public/8.0.0/ucd/IndicSyllabicCategory.txt
61edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectbool isPureKiller(uint32_t c) {
62edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    return (c == 0x0E3A || c == 0x0E4E || c == 0x0F84 || c == 0x103A || c == 0x1714 || c == 0x1734
63edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x17D1 || c == 0x1BAA || c == 0x1BF2 || c == 0x1BF3 || c == 0xA806
64edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0xA953 || c == 0xABED || c == 0x11134 || c == 0x112EA || c == 0x1172B);
65edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project}
66edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
67edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// Returns true if the character appears before or after zwj in a zwj emoji sequence. See
68edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html
69edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectbool isZwjEmoji(uint32_t c) {
70edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    return (c == 0x2764       // HEAVY BLACK HEART
71edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x1F468   // MAN
72edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x1F469   // WOMAN
73edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x1F48B   // KISS MARK
74edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x1F466   // BOY
75edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x1F467   // GIRL
76edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x1F441   // EYE
77edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            || c == 0x1F5E8); // LEFT SPEECH BUBBLE
78edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project}
79edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
80edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// Based on Modifiers from http://www.unicode.org/L2/L2016/16011-data-file.txt
81edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectbool isEmojiModifier(uint32_t c) {
82edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    return (0x1F3FB <= c && c <= 0x1F3FF);
83edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project}
84edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
85edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// Based on Emoji_Modifier_Base from
86edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
87edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectbool isEmojiBase(uint32_t c) {
88edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (0x261D <= c && c <= 0x270D) {
89edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D));
90edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    } else if (0x1F385 <= c && c <= 0x1F93E) {
91edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return (c == 0x1F385
92edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F3C3 <= c || c <= 0x1F3C4)
93edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F3CA <= c || c <= 0x1F3CB)
94edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F442 <= c || c <= 0x1F443)
95edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F446 <= c || c <= 0x1F450)
96edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F466 <= c || c <= 0x1F469)
97edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || c == 0x1F46E
98edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F470 <= c || c <= 0x1F478)
99edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || c == 0x1F47C
100edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F481 <= c || c <= 0x1F483)
101edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F485 <= c || c <= 0x1F487)
102edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || c == 0x1F4AA
103edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || c == 0x1F575
104edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || c == 0x1F57A
105edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || c == 0x1F590
106edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F595 <= c || c <= 0x1F596)
107edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F645 <= c || c <= 0x1F647)
108edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F64B <= c || c <= 0x1F64F)
109edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || c == 0x1F6A3
110edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F6B4 <= c || c <= 0x1F6B6)
111edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || c == 0x1F6C0
112edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F918 <= c || c <= 0x1F91E)
113edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || c == 0x1F926
114edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || c == 0x1F930
115edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F933 <= c || c <= 0x1F939)
116edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project                || (0x1F93B <= c || c <= 0x1F93E));
117edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    } else {
118edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return false;
119edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
120edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project}
121edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
122edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectbool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
123edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        size_t offset) {
124edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // This implementation closely follows Unicode Standard Annex #29 on
125edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/),
126edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // implementing a tailored version of extended grapheme clusters.
127edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules.
128edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
129edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Rule GB1, sot ÷; Rule GB2, ÷ eot
130edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (offset <= start || offset >= start + count) {
131edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return true;
132edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
133edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (U16_IS_TRAIL(buf[offset])) {
134edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        // Don't break a surrogate pair, but a lonely trailing surrogate pair is a break
135edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return !U16_IS_LEAD(buf[offset - 1]);
136edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
137edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    uint32_t c1 = 0;
138edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    uint32_t c2 = 0;
139edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    size_t offset_back = offset;
140edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    U16_PREV(buf, start, offset_back, c1);
141edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    U16_NEXT(buf, offset, start + count, c2);
142edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    int32_t p1 = tailoredGraphemeClusterBreak(c1);
143edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    int32_t p2 = tailoredGraphemeClusterBreak(c2);
144edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Rule GB3, CR x LF
145edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (p1 == U_GCB_CR && p2 == U_GCB_LF) {
146edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return false;
147edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
148edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Rule GB4, (Control | CR | LF) ÷
149edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) {
150edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return true;
151edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
152edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Rule GB5, ÷ (Control | CR | LF)
153edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) {
154edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return true;
155edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
156edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Rule GB6, L x ( L | V | LV | LVT )
157edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) {
158edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return false;
159edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
160edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Rule GB7, ( LV | V ) x ( V | T )
161edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) {
162edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return false;
163edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
164edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Rule GB8, ( LVT | T ) x T
165edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) {
166edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return false;
167edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
168edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Rule GB8a, Regional_Indicator x Regional_Indicator
169edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    //
170edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Known limitation: This is overly conservative, and returns no grapheme breaks between two
171edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // flags, such as in the character sequence "U+1F1FA U+1F1F8 [potential break] U+1F1FA U+1F1F8".
172edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Also, it assumes that all combinations of Regional Indicators produce a flag, where they
173edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // don't.
174edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    //
175edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // There is no easy solution for doing this correctly, except for querying the font and doing
176edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // some lookback.
177edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
178edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return false;
179edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
180edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Rule GB9, x Extend; Rule GB9a, x SpacingMark; Rule GB9b, Prepend x
181edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) {
182edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return false;
183edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
184edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Cluster indic syllables together (tailoring of UAX #29)
185edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Known limitation: this is overly conservative, and assumes that the virama may form a
186edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // conjunct with the following letter, which doesn't always happen.
187edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    //
188edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // There is no easy solution to do this correctly. Even querying the font does not help (with
189edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // the current font technoloies), since the font may be creating the conjunct using multiple
190edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // glyphs, while the user may be perceiving that sequence of glyphs as one conjunct or one
191edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // letter.
192edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9  // virama
193edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            && !isPureKiller(c1)
194edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
195edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        return false;
196edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
197edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Tailoring: make emoji sequences with ZWJ a single grapheme cluster
198edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (c1 == 0x200D && isZwjEmoji(c2) && offset_back > start) {
199edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        // look at character before ZWJ to see that both can participate in an emoji zwj sequence
200edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        uint32_t c0 = 0;
201edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        U16_PREV(buf, start, offset_back, c0);
202edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        if (c0 == 0xFE0F && offset_back > start) {
203edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            // skip over emoji variation selector
204edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            U16_PREV(buf, start, offset_back, c0);
205edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        }
206edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        if (isZwjEmoji(c0)) {
207edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            return false;
208edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        }
209edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
210edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Proposed Rule GB9c from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf
211edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // E_Base x E_Modifier
212edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    if (isEmojiModifier(c2)) {
213edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        if (c1 == 0xFE0F && offset_back > start) {
214edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            // skip over emoji variation selector
215edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            U16_PREV(buf, start, offset_back, c1);
216edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        }
217edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        if (isEmojiBase(c1)) {
218edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            return false;
219edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        }
220edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
221edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    // Rule GB10, Any ÷ Any
222edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    return true;
223edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project}
224edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
225edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectsize_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count,
226edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        size_t offset, MoveOpt opt) {
227edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    switch (opt) {
228edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    case AFTER:
229edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        if (offset < start + count) {
230edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            offset++;
231edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        }
232edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        // fall through
233edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    case AT_OR_AFTER:
234edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        while (!isGraphemeBreak(buf, start, count, offset)) {
235edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            offset++;
236edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        }
237edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        break;
238edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    case BEFORE:
239edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        if (offset > start) {
240edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            offset--;
241edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        }
242edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        // fall through
243edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    case AT_OR_BEFORE:
244edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        while (!isGraphemeBreak(buf, start, count, offset)) {
245edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            offset--;
246edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        }
247edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        break;
248edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    case AT:
249edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        if (!isGraphemeBreak(buf, start, count, offset)) {
250edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project            offset = (size_t)-1;
251edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        }
252edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project        break;
253edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    }
254edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project    return offset;
255edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project}
256edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project
257edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project}  // namespace android
258edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project