GraphemeBreak.cpp revision adfa580f1f067c846509b4346e5be2cb19177c1b
1edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project/* 2edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * Copyright (C) 2014 The Android Open Source Project 3edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * 4edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License"); 5edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * you may not use this file except in compliance with the License. 6edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * You may obtain a copy of the License at 7edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * 8edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * http://www.apache.org/licenses/LICENSE-2.0 9edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * 10edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * Unless required by applicable law or agreed to in writing, software 11edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS, 12edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * See the License for the specific language governing permissions and 14edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project * limitations under the License. 15edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project */ 16edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 17edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project#include <stdint.h> 18edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project#include <unicode/uchar.h> 19edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project#include <unicode/utf16.h> 20edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 21edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project#include <minikin/GraphemeBreak.h> 22edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 23edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectnamespace android { 24edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 25edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectint32_t tailoredGraphemeClusterBreak(uint32_t c) { 26edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Characters defined as Control that we want to treat them as Extend. 27edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // These are curated manually. 28edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (c == 0x00AD // SHY 29edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x061C // ALM 30edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x180E // MONGOLIAN VOWEL SEPARATOR 31edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x200B // ZWSP 32edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x200E // LRM 33edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x200F // RLM 34edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x202A <= c && c <= 0x202E) // LRE, RLE, PDF, LRO, RLO 35edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || ((c | 0xF) == 0x206F) // WJ, invisible math operators, LRI, RLI, FSI, PDI, 36edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // and the deprecated invisible format controls 37edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0xFEFF // BOM 38edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || ((c | 0x7F) == 0xE007F)) // recently undeprecated tag characters in Plane 14 39edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return U_GCB_EXTEND; 40edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // UTC-approved characters for the Prepend class, per 41edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // http://www.unicode.org/L2/L2015/15183r-graph-cluster-brk.txt 42edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // These should be removed when our copy of ICU gets updated to Unicode 9.0 (~2016 or 2017). 43edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project else if ((0x0600 <= c && c <= 0x0605) // Arabic subtending marks 44edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x06DD // ARABIC SUBTENDING MARK 45edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x070F // SYRIAC ABBREVIATION MARK 46edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x0D4E // MALAYALAM LETTER DOT REPH 47edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x110BD // KAITHI NUMBER SIGN 48edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x111C2 // SHARADA SIGN JIHVAMULIYA 49edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x111C3) // SHARADA SIGN UPADHMANIYA 50edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return U_GCB_PREPEND; 51edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // THAI CHARACTER SARA AM is treated as a normal letter by most other implementations: they 52edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // allow a grapheme break before it. 53edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project else if (c == 0x0E33) 54edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return U_GCB_OTHER; 55edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project else 56edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); 57edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project} 58edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 59edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// Returns true for all characters whose IndicSyllabicCategory is Pure_Killer. 60edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// From http://www.unicode.org/Public/8.0.0/ucd/IndicSyllabicCategory.txt 61edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectbool isPureKiller(uint32_t c) { 62edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return (c == 0x0E3A || c == 0x0E4E || c == 0x0F84 || c == 0x103A || c == 0x1714 || c == 0x1734 63edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x17D1 || c == 0x1BAA || c == 0x1BF2 || c == 0x1BF3 || c == 0xA806 64edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0xA953 || c == 0xABED || c == 0x11134 || c == 0x112EA || c == 0x1172B); 65edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project} 66edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 67edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// Returns true if the character appears before or after zwj in a zwj emoji sequence. See 68edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html 69edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectbool isZwjEmoji(uint32_t c) { 70edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return (c == 0x2764 // HEAVY BLACK HEART 71edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F468 // MAN 72edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F469 // WOMAN 73edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F48B // KISS MARK 74edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F466 // BOY 75edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F467 // GIRL 76edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F441 // EYE 77edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F5E8); // LEFT SPEECH BUBBLE 78edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project} 79edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 80edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// Based on Modifiers from http://www.unicode.org/L2/L2016/16011-data-file.txt 81edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectbool isEmojiModifier(uint32_t c) { 82edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return (0x1F3FB <= c && c <= 0x1F3FF); 83edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project} 84edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 85edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// Based on Emoji_Modifier_Base from 86edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project// http://www.unicode.org/Public/emoji/3.0/emoji-data.txt 87edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectbool isEmojiBase(uint32_t c) { 88edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (0x261D <= c && c <= 0x270D) { 89edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D)); 90edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } else if (0x1F385 <= c && c <= 0x1F93E) { 91edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return (c == 0x1F385 92edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F3C3 <= c || c <= 0x1F3C4) 93edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F3CA <= c || c <= 0x1F3CB) 94edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F442 <= c || c <= 0x1F443) 95edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F446 <= c || c <= 0x1F450) 96edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F466 <= c || c <= 0x1F469) 97edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F46E 98edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F470 <= c || c <= 0x1F478) 99edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F47C 100edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F481 <= c || c <= 0x1F483) 101edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F485 <= c || c <= 0x1F487) 102edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F4AA 103edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F575 104edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F57A 105edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F590 106edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F595 <= c || c <= 0x1F596) 107edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F645 <= c || c <= 0x1F647) 108edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F64B <= c || c <= 0x1F64F) 109edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F6A3 110edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F6B4 <= c || c <= 0x1F6B6) 111edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F6C0 112edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F918 <= c || c <= 0x1F91E) 113edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F926 114edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || c == 0x1F930 115edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F933 <= c || c <= 0x1F939) 116edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project || (0x1F93B <= c || c <= 0x1F93E)); 117edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } else { 118edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return false; 119edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 120edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project} 121edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 122edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectbool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, 123edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project size_t offset) { 124edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // This implementation closely follows Unicode Standard Annex #29 on 125edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/), 126edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // implementing a tailored version of extended grapheme clusters. 127edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules. 128edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 129edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Rule GB1, sot ÷; Rule GB2, ÷ eot 130edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (offset <= start || offset >= start + count) { 131edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return true; 132edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 133edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (U16_IS_TRAIL(buf[offset])) { 134edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Don't break a surrogate pair, but a lonely trailing surrogate pair is a break 135edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return !U16_IS_LEAD(buf[offset - 1]); 136edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 137edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project uint32_t c1 = 0; 138edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project uint32_t c2 = 0; 139edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project size_t offset_back = offset; 140edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project U16_PREV(buf, start, offset_back, c1); 141edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project U16_NEXT(buf, offset, start + count, c2); 142edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project int32_t p1 = tailoredGraphemeClusterBreak(c1); 143edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project int32_t p2 = tailoredGraphemeClusterBreak(c2); 144edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Rule GB3, CR x LF 145edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (p1 == U_GCB_CR && p2 == U_GCB_LF) { 146edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return false; 147edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 148edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Rule GB4, (Control | CR | LF) ÷ 149edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) { 150edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return true; 151edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 152edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Rule GB5, ÷ (Control | CR | LF) 153edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) { 154edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return true; 155edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 156edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Rule GB6, L x ( L | V | LV | LVT ) 157edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) { 158edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return false; 159edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 160edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Rule GB7, ( LV | V ) x ( V | T ) 161edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) { 162edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return false; 163edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 164edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Rule GB8, ( LVT | T ) x T 165edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) { 166edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return false; 167edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 168edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Rule GB8a, Regional_Indicator x Regional_Indicator 169edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // 170edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Known limitation: This is overly conservative, and returns no grapheme breaks between two 171edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // flags, such as in the character sequence "U+1F1FA U+1F1F8 [potential break] U+1F1FA U+1F1F8". 172edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Also, it assumes that all combinations of Regional Indicators produce a flag, where they 173edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // don't. 174edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // 175edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // There is no easy solution for doing this correctly, except for querying the font and doing 176edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // some lookback. 177edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) { 178edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return false; 179edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 180edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Rule GB9, x Extend; Rule GB9a, x SpacingMark; Rule GB9b, Prepend x 181edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) { 182edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return false; 183edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 184edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Cluster indic syllables together (tailoring of UAX #29) 185edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Known limitation: this is overly conservative, and assumes that the virama may form a 186edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // conjunct with the following letter, which doesn't always happen. 187edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // 188edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // There is no easy solution to do this correctly. Even querying the font does not help (with 189edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // the current font technoloies), since the font may be creating the conjunct using multiple 190edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // glyphs, while the user may be perceiving that sequence of glyphs as one conjunct or one 191edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // letter. 192edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama 193edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project && !isPureKiller(c1) 194edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { 195edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return false; 196edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 197edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Tailoring: make emoji sequences with ZWJ a single grapheme cluster 198edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (c1 == 0x200D && isZwjEmoji(c2) && offset_back > start) { 199edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // look at character before ZWJ to see that both can participate in an emoji zwj sequence 200edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project uint32_t c0 = 0; 201edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project U16_PREV(buf, start, offset_back, c0); 202edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (c0 == 0xFE0F && offset_back > start) { 203edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // skip over emoji variation selector 204edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project U16_PREV(buf, start, offset_back, c0); 205edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 206edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (isZwjEmoji(c0)) { 207edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return false; 208edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 209edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 210edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Proposed Rule GB9c from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf 211edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // E_Base x E_Modifier 212edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (isEmojiModifier(c2)) { 213edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (c1 == 0xFE0F && offset_back > start) { 214edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // skip over emoji variation selector 215edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project U16_PREV(buf, start, offset_back, c1); 216edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 217edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (isEmojiBase(c1)) { 218edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return false; 219edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 220edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 221edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // Rule GB10, Any ÷ Any 222edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return true; 223edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project} 224edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 225edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Projectsize_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count, 226edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project size_t offset, MoveOpt opt) { 227edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project switch (opt) { 228edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project case AFTER: 229edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (offset < start + count) { 230edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project offset++; 231edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 232edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // fall through 233edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project case AT_OR_AFTER: 234edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project while (!isGraphemeBreak(buf, start, count, offset)) { 235edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project offset++; 236edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 237edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project break; 238edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project case BEFORE: 239edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (offset > start) { 240edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project offset--; 241edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 242edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project // fall through 243edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project case AT_OR_BEFORE: 244edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project while (!isGraphemeBreak(buf, start, count, offset)) { 245edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project offset--; 246edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 247edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project break; 248edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project case AT: 249edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project if (!isGraphemeBreak(buf, start, count, offset)) { 250edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project offset = (size_t)-1; 251edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 252edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project break; 253edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project } 254edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project return offset; 255edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project} 256edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project 257edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project} // namespace android 258edbf3b6af777b721cd2a1ef461947e51e88241e1The Android Open Source Project