13d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien/*
23d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Copyright (C) 2014 The Android Open Source Project
33d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien *
43d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Licensed under the Apache License, Version 2.0 (the "License");
53d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * you may not use this file except in compliance with the License.
63d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * You may obtain a copy of the License at
73d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien *
83d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien *      http://www.apache.org/licenses/LICENSE-2.0
93d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien *
103d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Unless required by applicable law or agreed to in writing, software
113d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * distributed under the License is distributed on an "AS IS" BASIS,
123d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
133d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * See the License for the specific language governing permissions and
143d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * limitations under the License.
153d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien */
163d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
173d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <stdint.h>
18450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka#include <algorithm>
193d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <unicode/uchar.h>
203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <unicode/utf16.h>
213d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
223d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <minikin/GraphemeBreak.h>
23bab3b98ceb29fa3fc5d8832284312859d7f32cc7Roozbeh Pournader#include <minikin/Emoji.h>
240036da164e3b25f1ac29c840c1fe15b03dc6677fSeigo Nonaka#include "MinikinInternal.h"
253d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
2614e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonakanamespace minikin {
273d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
28c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournaderint32_t tailoredGraphemeClusterBreak(uint32_t c) {
29c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // Characters defined as Control that we want to treat them as Extend.
30c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // These are curated manually.
31c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    if (c == 0x00AD                         // SHY
32c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x061C                  // ALM
33c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x180E                  // MONGOLIAN VOWEL SEPARATOR
34c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x200B                  // ZWSP
35c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x200E                  // LRM
36c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x200F                  // RLM
37c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || (0x202A <= c && c <= 0x202E) // LRE, RLE, PDF, LRO, RLO
38c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || ((c | 0xF) == 0x206F)        // WJ, invisible math operators, LRI, RLI, FSI, PDI,
39c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader                                            // and the deprecated invisible format controls
40c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0xFEFF                  // BOM
41c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || ((c | 0x7F) == 0xE007F))     // recently undeprecated tag characters in Plane 14
42c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader        return U_GCB_EXTEND;
43c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // THAI CHARACTER SARA AM is treated as a normal letter by most other implementations: they
44c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // allow a grapheme break before it.
45c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    else if (c == 0x0E33)
46c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader        return U_GCB_OTHER;
47c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    else
48c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader        return u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
49c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader}
50c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader
51c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader// Returns true for all characters whose IndicSyllabicCategory is Pure_Killer.
528a805cd2651b5d1b55d6af036a40c9b9c63c9d74Seigo Nonaka// From http://www.unicode.org/Public/9.0.0/ucd/IndicSyllabicCategory.txt
53c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournaderbool isPureKiller(uint32_t c) {
54c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    return (c == 0x0E3A || c == 0x0E4E || c == 0x0F84 || c == 0x103A || c == 0x1714 || c == 0x1734
55c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x17D1 || c == 0x1BAA || c == 0x1BF2 || c == 0x1BF3 || c == 0xA806
56c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0xA953 || c == 0xABED || c == 0x11134 || c == 0x112EA || c == 0x1172B);
57c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader}
58c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader
59b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournaderbool GraphemeBreak::isGraphemeBreak(const float* advances, const uint16_t* buf, size_t start,
60b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader        size_t count, const size_t offset) {
613d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // This implementation closely follows Unicode Standard Annex #29 on
623d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/),
633d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // implementing a tailored version of extended grapheme clusters.
643d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules.
653d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
66d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB1, sot ÷; Rule GB2, ÷ eot
673d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (offset <= start || offset >= start + count) {
683d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return true;
693d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
703d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (U16_IS_TRAIL(buf[offset])) {
71d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien        // Don't break a surrogate pair, but a lonely trailing surrogate pair is a break
72d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien        return !U16_IS_LEAD(buf[offset - 1]);
733d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
743d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    uint32_t c1 = 0;
753d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    uint32_t c2 = 0;
763d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    size_t offset_back = offset;
77b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader    size_t offset_forward = offset;
783d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    U16_PREV(buf, start, offset_back, c1);
79b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader    U16_NEXT(buf, offset_forward, start + count, c2);
80c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    int32_t p1 = tailoredGraphemeClusterBreak(c1);
81c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    int32_t p2 = tailoredGraphemeClusterBreak(c2);
823d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // Rule GB3, CR x LF
833d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (p1 == U_GCB_CR && p2 == U_GCB_LF) {
843d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
853d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
86d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB4, (Control | CR | LF) ÷
873d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) {
883d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return true;
893d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
90d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB5, ÷ (Control | CR | LF)
913d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) {
923d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return true;
933d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
943d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // Rule GB6, L x ( L | V | LV | LVT )
953d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) {
963d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
973d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
983d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // Rule GB7, ( LV | V ) x ( V | T )
993d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) {
1003d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
1013d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
1023d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // Rule GB8, ( LVT | T ) x T
103d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) {
1043d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
1053d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
1068a805cd2651b5d1b55d6af036a40c9b9c63c9d74Seigo Nonaka    // Rule GB9, x (Extend | ZWJ); Rule GB9a, x SpacingMark; Rule GB9b, Prepend x
1078a805cd2651b5d1b55d6af036a40c9b9c63c9d74Seigo Nonaka    if (p2 == U_GCB_EXTEND || p2 == U_GCB_ZWJ || p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) {
1083d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
1093d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
11093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader
11193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // This is used to decide font-dependent grapheme clusters. If we don't have the advance
11293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // information, we become conservative in grapheme breaking and assume that it has no advance.
11393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    const bool c2_has_advance = (advances != nullptr && advances[offset - start] != 0.0);
11493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader
11593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // All the following rules are font-dependent, in the way that if we know c2 has an advance,
11693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // we definitely know that it cannot form a grapheme with the character(s) before it. So we
11793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // make the decision in favor a grapheme break early.
11893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    if (c2_has_advance) {
11993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        return true;
1203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
12193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader
12293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // Note: For Rule GB10 and GB11 below, we do not use the Unicode line breaking properties for
12393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // determining emoji-ness and carry our own data, because our data could be more fresh than what
12493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // ICU provides.
12593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    //
12693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // Tailored version of Rule GB10, (E_Base | EBG) Extend* × E_Modifier.
12793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // The rule itself says do not break between emoji base and emoji modifiers, skipping all Extend
12893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // characters. Variation selectors are considered Extend, so they are handled fine.
12993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    //
13093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // We tailor this by requiring that an actual ligature is formed. If the font doesn't form a
13193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // ligature, we allow a break before the modifier.
13293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    if (isEmojiModifier(c2)) {
13393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        uint32_t c0 = c1;
13493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        size_t offset_backback = offset_back;
13593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        int32_t p0 = p1;
13693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        if (p0 == U_GCB_EXTEND && offset_backback > start) {
13793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            // skip over emoji variation selector
13893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            U16_PREV(buf, start, offset_backback, c0);
13993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            p0 = tailoredGraphemeClusterBreak(c0);
14093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        }
14193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        if (isEmojiBase(c0)) {
14293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            return false;
14393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        }
14493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    }
14593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // Tailored version of Rule GB11, ZWJ × (Glue_After_Zwj | EBG)
14693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // We try to make emoji sequences with ZWJ a single grapheme cluster, but only if they actually
14793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // merge to one cluster. So we are more relaxed than the UAX #29 rules in accepting any emoji
14893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // character after the ZWJ, but are tighter in that we only treat it as one cluster if a
14993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // ligature is actually formed and we also require the character before the ZWJ to also be an
15093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // emoji.
1518a805cd2651b5d1b55d6af036a40c9b9c63c9d74Seigo Nonaka    if (p1 == U_GCB_ZWJ && isEmoji(c2) && offset_back > start) {
1526638e05ac2de397455c30cae05aca399a567428dRaph Levien        // look at character before ZWJ to see that both can participate in an emoji zwj sequence
1536638e05ac2de397455c30cae05aca399a567428dRaph Levien        uint32_t c0 = 0;
15493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        size_t offset_backback = offset_back;
15593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        U16_PREV(buf, start, offset_backback, c0);
15693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        if (c0 == 0xFE0F && offset_backback > start) {
1576638e05ac2de397455c30cae05aca399a567428dRaph Levien            // skip over emoji variation selector
15893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            U16_PREV(buf, start, offset_backback, c0);
1596638e05ac2de397455c30cae05aca399a567428dRaph Levien        }
16077f488345316fba46c271fc04bea470819ae1712Seigo Nonaka        if (isEmoji(c0)) {
1616638e05ac2de397455c30cae05aca399a567428dRaph Levien            return false;
1626638e05ac2de397455c30cae05aca399a567428dRaph Levien        }
1636638e05ac2de397455c30cae05aca399a567428dRaph Levien    }
16493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // Tailored version of Rule GB12 and Rule GB13 that look at even-odd cases.
16593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // sot   (RI RI)*  RI x RI
16693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // [^RI] (RI RI)*  RI x RI
16793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    //
16893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // If we have font information, we have already broken the cluster if and only if the second
16993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // character had no advance, which means a ligature was formed. If we don't, we look back like
17093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // UAX #29 recommends, but only up to 1000 code units.
17193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
17293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        if (advances != nullptr) {
17393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            // We have advances information. But if we are here, we already know c2 has no advance.
17493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            // So we should definitely disallow a break.
175adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien            return false;
17693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        } else {
17793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            // Look at up to 1000 code units.
17893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            const size_t lookback_barrier = std::max((ssize_t)start, (ssize_t)offset_back - 1000);
17993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            size_t offset_backback = offset_back;
18093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            while (offset_backback > lookback_barrier) {
18193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader                uint32_t c0 = 0;
18293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader                U16_PREV(buf, lookback_barrier, offset_backback, c0);
18393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader                if (tailoredGraphemeClusterBreak(c0) != U_GCB_REGIONAL_INDICATOR) {
18493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader                    offset_backback += U16_LENGTH(c0);
18593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader                    break;
18693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader                }
18793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            }
18893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            // The number 4 comes from the number of code units in a whole flag.
18993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            return (offset - offset_backback) % 4 == 0;
190adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien        }
191adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    }
19293e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // Cluster Indic syllables together (tailoring of UAX #29).
19393e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // Immediately after each virama (that is not just a pure killer) followed by a letter, we
19493e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // disallow grapheme breaks (if we are here, we don't know about advances, or we already know
19593e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // that c2 has no advance).
19693e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9  // virama
19793e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            && !isPureKiller(c1)
19893e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader            && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
19993e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader        return false;
20093e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    }
20193e9c9f71e7cb418287a0acc8d188e385ba11e43Roozbeh Pournader    // Rule GB999, Any ÷ Any
2023d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    return true;
2033d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien}
2043d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
205b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournadersize_t GraphemeBreak::getTextRunCursor(const float* advances, const uint16_t* buf, size_t start,
206b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader        size_t count, size_t offset, MoveOpt opt) {
2073d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    switch (opt) {
2083d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    case AFTER:
2093d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        if (offset < start + count) {
2103d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            offset++;
2113d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        }
2123d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        // fall through
2133d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    case AT_OR_AFTER:
214b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader        while (!isGraphemeBreak(advances, buf, start, count, offset)) {
2153d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            offset++;
2163d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        }
2173d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        break;
2183d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    case BEFORE:
2193d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        if (offset > start) {
2203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            offset--;
2213d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        }
2223d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        // fall through
2233d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    case AT_OR_BEFORE:
224b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader        while (!isGraphemeBreak(advances, buf, start, count, offset)) {
2253d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            offset--;
2263d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        }
2273d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        break;
2283d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    case AT:
229b01028d1d7bc3906ef71c72ad985919f79304b5eRoozbeh Pournader        if (!isGraphemeBreak(advances, buf, start, count, offset)) {
2303d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            offset = (size_t)-1;
2313d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        }
2323d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        break;
2333d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
2343d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    return offset;
2353d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien}
2363d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
23714e2d136aaef271ba131f917cf5f27baa31ae5adSeigo Nonaka}  // namespace minikin
238