13d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien/* 23d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Copyright (C) 2014 The Android Open Source Project 33d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * 43d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Licensed under the Apache License, Version 2.0 (the "License"); 53d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * you may not use this file except in compliance with the License. 63d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * You may obtain a copy of the License at 73d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * 83d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * http://www.apache.org/licenses/LICENSE-2.0 93d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * 103d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Unless required by applicable law or agreed to in writing, software 113d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * distributed under the License is distributed on an "AS IS" BASIS, 123d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 133d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * See the License for the specific language governing permissions and 143d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * limitations under the License. 153d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien */ 163d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 173d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <stdint.h> 18450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka#include <algorithm> 193d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <unicode/uchar.h> 203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <unicode/utf16.h> 213d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 223d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <minikin/GraphemeBreak.h> 23bab3b98ceb29fa3fc5d8832284312859d7f32cc7Roozbeh Pournader#include <minikin/Emoji.h> 240036da164e3b25f1ac29c840c1fe15b03dc6677fSeigo Nonaka#include "MinikinInternal.h" 253d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 2614e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonakanamespace minikin { 273d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 28c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournaderint32_t tailoredGraphemeClusterBreak(uint32_t c) { 29c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // Characters defined as Control that we want to treat them as Extend. 30c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // These are curated manually. 31c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader if (c == 0x00AD // SHY 32c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x061C // ALM 33c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x180E // MONGOLIAN VOWEL SEPARATOR 34c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x200B // ZWSP 35c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x200E // LRM 36c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x200F // RLM 37c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || (0x202A <= c && c <= 0x202E) // LRE, RLE, PDF, LRO, RLO 38c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || ((c | 0xF) == 0x206F) // WJ, invisible math operators, LRI, RLI, FSI, PDI, 39c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // and the deprecated invisible format controls 40c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0xFEFF // BOM 41c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || ((c | 0x7F) == 0xE007F)) // recently undeprecated tag characters in Plane 14 42c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader return U_GCB_EXTEND; 43c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // THAI CHARACTER SARA AM is treated as a normal letter by most other implementations: they 44c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // allow a grapheme break before it. 45c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader else if (c == 0x0E33) 46c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader return U_GCB_OTHER; 47c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader else 48c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader return u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); 49c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader} 50c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader 51c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader// Returns true for all characters whose IndicSyllabicCategory is Pure_Killer. 528a805cd2651b5d1b55d6af036a40c9b9c63c9d74Seigo Nonaka// From http://www.unicode.org/Public/9.0.0/ucd/IndicSyllabicCategory.txt 53c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournaderbool isPureKiller(uint32_t c) { 54c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader return (c == 0x0E3A || c == 0x0E4E || c == 0x0F84 || c == 0x103A || c == 0x1714 || c == 0x1734 55c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x17D1 || c == 0x1BAA || c == 0x1BF2 || c == 0x1BF3 || c == 0xA806 56c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0xA953 || c == 0xABED || c == 0x11134 || c == 0x112EA || c == 0x1172B); 57c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader} 58c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader 59b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournaderbool GraphemeBreak::isGraphemeBreak(const float* advances, const uint16_t* buf, size_t start, 60b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader size_t count, const size_t offset) { 613d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // This implementation closely follows Unicode Standard Annex #29 on 623d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/), 633d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // implementing a tailored version of extended grapheme clusters. 643d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules. 653d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 66d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien // Rule GB1, sot ÷; Rule GB2, ÷ eot 673d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (offset <= start || offset >= start + count) { 683d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return true; 693d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 703d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (U16_IS_TRAIL(buf[offset])) { 71d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien // Don't break a surrogate pair, but a lonely trailing surrogate pair is a break 72d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien return !U16_IS_LEAD(buf[offset - 1]); 733d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 743d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien uint32_t c1 = 0; 753d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien uint32_t c2 = 0; 763d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien size_t offset_back = offset; 77b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader size_t offset_forward = offset; 783d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien U16_PREV(buf, start, offset_back, c1); 79b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader U16_NEXT(buf, offset_forward, start + count, c2); 80c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader int32_t p1 = tailoredGraphemeClusterBreak(c1); 81c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader int32_t p2 = tailoredGraphemeClusterBreak(c2); 823d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // Rule GB3, CR x LF 833d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (p1 == U_GCB_CR && p2 == U_GCB_LF) { 843d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 853d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 86d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien // Rule GB4, (Control | CR | LF) ÷ 873d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) { 883d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return true; 893d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 90d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien // Rule GB5, ÷ (Control | CR | LF) 913d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) { 923d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return true; 933d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 943d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // Rule GB6, L x ( L | V | LV | LVT ) 953d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) { 963d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 973d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 983d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // Rule GB7, ( LV | V ) x ( V | T ) 993d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) { 1003d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 1013d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 1023d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // Rule GB8, ( LVT | T ) x T 103d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) { 1043d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 1053d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 1068a805cd2651b5d1b55d6af036a40c9b9c63c9d74Seigo Nonaka // Rule GB9, x (Extend | ZWJ); Rule GB9a, x SpacingMark; Rule GB9b, Prepend x 1078a805cd2651b5d1b55d6af036a40c9b9c63c9d74Seigo Nonaka if (p2 == U_GCB_EXTEND || p2 == U_GCB_ZWJ || p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) { 1083d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 1093d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 11093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader 11193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // This is used to decide font-dependent grapheme clusters. If we don't have the advance 11293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // information, we become conservative in grapheme breaking and assume that it has no advance. 11393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader const bool c2_has_advance = (advances != nullptr && advances[offset - start] != 0.0); 11493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader 11593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // All the following rules are font-dependent, in the way that if we know c2 has an advance, 11693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // we definitely know that it cannot form a grapheme with the character(s) before it. So we 11793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // make the decision in favor a grapheme break early. 11893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader if (c2_has_advance) { 11993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader return true; 1203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 12193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader 12293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // Note: For Rule GB10 and GB11 below, we do not use the Unicode line breaking properties for 12393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // determining emoji-ness and carry our own data, because our data could be more fresh than what 12493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // ICU provides. 12593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // 12693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // Tailored version of Rule GB10, (E_Base | EBG) Extend* × E_Modifier. 12793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // The rule itself says do not break between emoji base and emoji modifiers, skipping all Extend 12893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // characters. Variation selectors are considered Extend, so they are handled fine. 12993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // 13093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // We tailor this by requiring that an actual ligature is formed. If the font doesn't form a 13193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // ligature, we allow a break before the modifier. 13293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader if (isEmojiModifier(c2)) { 13393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader uint32_t c0 = c1; 13493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader size_t offset_backback = offset_back; 13593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader int32_t p0 = p1; 13693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader if (p0 == U_GCB_EXTEND && offset_backback > start) { 13793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // skip over emoji variation selector 13893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader U16_PREV(buf, start, offset_backback, c0); 13993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader p0 = tailoredGraphemeClusterBreak(c0); 14093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader } 14193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader if (isEmojiBase(c0)) { 14293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader return false; 14393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader } 14493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader } 14593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // Tailored version of Rule GB11, ZWJ × (Glue_After_Zwj | EBG) 14693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // We try to make emoji sequences with ZWJ a single grapheme cluster, but only if they actually 14793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // merge to one cluster. So we are more relaxed than the UAX #29 rules in accepting any emoji 14893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // character after the ZWJ, but are tighter in that we only treat it as one cluster if a 14993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // ligature is actually formed and we also require the character before the ZWJ to also be an 15093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // emoji. 1518a805cd2651b5d1b55d6af036a40c9b9c63c9d74Seigo Nonaka if (p1 == U_GCB_ZWJ && isEmoji(c2) && offset_back > start) { 1526638e05ac2de397455c30cae05aca399a567428dRaph Levien // look at character before ZWJ to see that both can participate in an emoji zwj sequence 1536638e05ac2de397455c30cae05aca399a567428dRaph Levien uint32_t c0 = 0; 15493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader size_t offset_backback = offset_back; 15593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader U16_PREV(buf, start, offset_backback, c0); 15693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader if (c0 == 0xFE0F && offset_backback > start) { 1576638e05ac2de397455c30cae05aca399a567428dRaph Levien // skip over emoji variation selector 15893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader U16_PREV(buf, start, offset_backback, c0); 1596638e05ac2de397455c30cae05aca399a567428dRaph Levien } 16077f488345316fba46c271fc04bea470819ae1712Seigo Nonaka if (isEmoji(c0)) { 1616638e05ac2de397455c30cae05aca399a567428dRaph Levien return false; 1626638e05ac2de397455c30cae05aca399a567428dRaph Levien } 1636638e05ac2de397455c30cae05aca399a567428dRaph Levien } 16493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // Tailored version of Rule GB12 and Rule GB13 that look at even-odd cases. 16593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // sot (RI RI)* RI x RI 16693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // [^RI] (RI RI)* RI x RI 16793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // 16893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // If we have font information, we have already broken the cluster if and only if the second 16993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // character had no advance, which means a ligature was formed. If we don't, we look back like 17093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // UAX #29 recommends, but only up to 1000 code units. 17193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) { 17293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader if (advances != nullptr) { 17393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // We have advances information. But if we are here, we already know c2 has no advance. 17493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // So we should definitely disallow a break. 175adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien return false; 17693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader } else { 17793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // Look at up to 1000 code units. 17893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader const size_t lookback_barrier = std::max((ssize_t)start, (ssize_t)offset_back - 1000); 17993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader size_t offset_backback = offset_back; 18093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader while (offset_backback > lookback_barrier) { 18193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader uint32_t c0 = 0; 18293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader U16_PREV(buf, lookback_barrier, offset_backback, c0); 18393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader if (tailoredGraphemeClusterBreak(c0) != U_GCB_REGIONAL_INDICATOR) { 18493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader offset_backback += U16_LENGTH(c0); 18593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader break; 18693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader } 18793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader } 18893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // The number 4 comes from the number of code units in a whole flag. 18993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader return (offset - offset_backback) % 4 == 0; 190adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien } 191adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien } 19293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // Cluster Indic syllables together (tailoring of UAX #29). 19393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // Immediately after each virama (that is not just a pure killer) followed by a letter, we 19493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // disallow grapheme breaks (if we are here, we don't know about advances, or we already know 19593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // that c2 has no advance). 19693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama 19793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader && !isPureKiller(c1) 19893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { 19993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader return false; 20093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader } 20193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader // Rule GB999, Any ÷ Any 2023d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return true; 2033d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien} 2043d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 205b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournadersize_t GraphemeBreak::getTextRunCursor(const float* advances, const uint16_t* buf, size_t start, 206b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader size_t count, size_t offset, MoveOpt opt) { 2073d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien switch (opt) { 2083d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien case AFTER: 2093d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (offset < start + count) { 2103d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien offset++; 2113d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2123d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // fall through 2133d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien case AT_OR_AFTER: 214b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader while (!isGraphemeBreak(advances, buf, start, count, offset)) { 2153d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien offset++; 2163d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2173d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien break; 2183d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien case BEFORE: 2193d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (offset > start) { 2203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien offset--; 2213d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2223d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // fall through 2233d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien case AT_OR_BEFORE: 224b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader while (!isGraphemeBreak(advances, buf, start, count, offset)) { 2253d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien offset--; 2263d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2273d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien break; 2283d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien case AT: 229b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader if (!isGraphemeBreak(advances, buf, start, count, offset)) { 2303d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien offset = (size_t)-1; 2313d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2323d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien break; 2333d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2343d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return offset; 2353d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien} 2363d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 23714e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonaka} // namespace minikin 238