GraphemeBreak.cpp revision 450e96c8170c3d59a5896e734c90d3f9def505f8
13d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien/*
23d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Copyright (C) 2014 The Android Open Source Project
33d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien *
43d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Licensed under the Apache License, Version 2.0 (the "License");
53d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * you may not use this file except in compliance with the License.
63d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * You may obtain a copy of the License at
73d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien *
83d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien *      http://www.apache.org/licenses/LICENSE-2.0
93d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien *
103d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Unless required by applicable law or agreed to in writing, software
113d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * distributed under the License is distributed on an "AS IS" BASIS,
123d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
133d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * See the License for the specific language governing permissions and
143d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * limitations under the License.
153d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien */
163d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
173d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <stdint.h>
18450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka#include <algorithm>
193d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <unicode/uchar.h>
203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <unicode/utf16.h>
213d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
223d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <minikin/GraphemeBreak.h>
230036da164e3b25f1ac29c840c1fe15b03dc6677fSeigo Nonaka#include "MinikinInternal.h"
243d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
253d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Leviennamespace android {
263d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
27c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournaderint32_t tailoredGraphemeClusterBreak(uint32_t c) {
28c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // Characters defined as Control that we want to treat them as Extend.
29c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // These are curated manually.
30c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    if (c == 0x00AD                         // SHY
31c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x061C                  // ALM
32c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x180E                  // MONGOLIAN VOWEL SEPARATOR
33c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x200B                  // ZWSP
34c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x200E                  // LRM
35c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x200F                  // RLM
36c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || (0x202A <= c && c <= 0x202E) // LRE, RLE, PDF, LRO, RLO
37c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || ((c | 0xF) == 0x206F)        // WJ, invisible math operators, LRI, RLI, FSI, PDI,
38c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader                                            // and the deprecated invisible format controls
39c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0xFEFF                  // BOM
40c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || ((c | 0x7F) == 0xE007F))     // recently undeprecated tag characters in Plane 14
41c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader        return U_GCB_EXTEND;
42c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // UTC-approved characters for the Prepend class, per
43c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // http://www.unicode.org/L2/L2015/15183r-graph-cluster-brk.txt
44c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // These should be removed when our copy of ICU gets updated to Unicode 9.0 (~2016 or 2017).
45c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    else if ((0x0600 <= c && c <= 0x0605) // Arabic subtending marks
46c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x06DD                // ARABIC SUBTENDING MARK
47c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x070F                // SYRIAC ABBREVIATION MARK
48c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x0D4E                // MALAYALAM LETTER DOT REPH
49c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x110BD               // KAITHI NUMBER SIGN
50c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x111C2               // SHARADA SIGN JIHVAMULIYA
51c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x111C3)              // SHARADA SIGN UPADHMANIYA
52c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader        return U_GCB_PREPEND;
53c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // THAI CHARACTER SARA AM is treated as a normal letter by most other implementations: they
54c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // allow a grapheme break before it.
55c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    else if (c == 0x0E33)
56c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader        return U_GCB_OTHER;
57c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    else
58c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader        return u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
59c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader}
60c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader
61c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader// Returns true for all characters whose IndicSyllabicCategory is Pure_Killer.
62c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader// From http://www.unicode.org/Public/8.0.0/ucd/IndicSyllabicCategory.txt
63c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournaderbool isPureKiller(uint32_t c) {
64c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    return (c == 0x0E3A || c == 0x0E4E || c == 0x0F84 || c == 0x103A || c == 0x1714 || c == 0x1734
65c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0x17D1 || c == 0x1BAA || c == 0x1BF2 || c == 0x1BF3 || c == 0xA806
66c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            || c == 0xA953 || c == 0xABED || c == 0x11134 || c == 0x112EA || c == 0x1172B);
67c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader}
68c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader
696638e05ac2de397455c30cae05aca399a567428dRaph Levien// Returns true if the character appears before or after zwj in a zwj emoji sequence. See
706638e05ac2de397455c30cae05aca399a567428dRaph Levien// http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html
716638e05ac2de397455c30cae05aca399a567428dRaph Levienbool isZwjEmoji(uint32_t c) {
726638e05ac2de397455c30cae05aca399a567428dRaph Levien    return (c == 0x2764       // HEAVY BLACK HEART
736638e05ac2de397455c30cae05aca399a567428dRaph Levien            || c == 0x1F468   // MAN
746638e05ac2de397455c30cae05aca399a567428dRaph Levien            || c == 0x1F469   // WOMAN
756638e05ac2de397455c30cae05aca399a567428dRaph Levien            || c == 0x1F48B   // KISS MARK
766638e05ac2de397455c30cae05aca399a567428dRaph Levien            || c == 0x1F466   // BOY
776638e05ac2de397455c30cae05aca399a567428dRaph Levien            || c == 0x1F467   // GIRL
786638e05ac2de397455c30cae05aca399a567428dRaph Levien            || c == 0x1F441   // EYE
796638e05ac2de397455c30cae05aca399a567428dRaph Levien            || c == 0x1F5E8); // LEFT SPEECH BUBBLE
806638e05ac2de397455c30cae05aca399a567428dRaph Levien}
816638e05ac2de397455c30cae05aca399a567428dRaph Levien
823d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levienbool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
833d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        size_t offset) {
843d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // This implementation closely follows Unicode Standard Annex #29 on
853d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/),
863d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // implementing a tailored version of extended grapheme clusters.
873d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules.
883d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
89d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB1, sot ÷; Rule GB2, ÷ eot
903d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (offset <= start || offset >= start + count) {
913d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return true;
923d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
933d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (U16_IS_TRAIL(buf[offset])) {
94d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien        // Don't break a surrogate pair, but a lonely trailing surrogate pair is a break
95d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien        return !U16_IS_LEAD(buf[offset - 1]);
963d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
973d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    uint32_t c1 = 0;
983d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    uint32_t c2 = 0;
993d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    size_t offset_back = offset;
1003d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    U16_PREV(buf, start, offset_back, c1);
10140beb7744a61248de82a6077996c83c14e0122c2Raph Levien    U16_NEXT(buf, offset, start + count, c2);
102c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    int32_t p1 = tailoredGraphemeClusterBreak(c1);
103c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    int32_t p2 = tailoredGraphemeClusterBreak(c2);
1043d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // Rule GB3, CR x LF
1053d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (p1 == U_GCB_CR && p2 == U_GCB_LF) {
1063d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
1073d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
108d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB4, (Control | CR | LF) ÷
1093d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) {
1103d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return true;
1113d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
112d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB5, ÷ (Control | CR | LF)
1133d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) {
1143d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return true;
1153d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
1163d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // Rule GB6, L x ( L | V | LV | LVT )
1173d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) {
1183d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
1193d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
1203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // Rule GB7, ( LV | V ) x ( V | T )
1213d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) {
1223d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
1233d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
1243d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    // Rule GB8, ( LVT | T ) x T
125d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) {
1263d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
1273d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
128450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    // Rule GB8a that looks at even-off cases.
129c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    //
130450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    // sot   (RI RI)*  RI x RI
131450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    // [^RI] (RI RI)*  RI x RI
132450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka    //                 RI ÷ RI
1333d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
134450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka        // Look at up to 1000 code units.
135450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka        start = std::max((ssize_t)start, (ssize_t)offset_back - 1000);
136450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka        while (offset_back > start) {
137450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka            U16_PREV(buf, start, offset_back, c1);
138450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka            if (tailoredGraphemeClusterBreak(c1) != U_GCB_REGIONAL_INDICATOR) {
139450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka                offset_back += U16_LENGTH(c1);
140450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka                break;
141450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka            }
142450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka        }
143450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka
144450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka        // Note that the offset has moved forwared 2 code units by U16_NEXT.
145450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka        // The number 4 comes from the number of code units in a whole flag.
146450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka        return (offset - 2 - offset_back) % 4 == 0;
1473d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
148c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // Rule GB9, x Extend; Rule GB9a, x SpacingMark; Rule GB9b, Prepend x
149c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) {
1503d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
1513d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
15240beb7744a61248de82a6077996c83c14e0122c2Raph Levien    // Cluster indic syllables together (tailoring of UAX #29)
153c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // Known limitation: this is overly conservative, and assumes that the virama may form a
154c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // conjunct with the following letter, which doesn't always happen.
155c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    //
156c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // There is no easy solution to do this correctly. Even querying the font does not help (with
157c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // the current font technoloies), since the font may be creating the conjunct using multiple
158c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // glyphs, while the user may be perceiving that sequence of glyphs as one conjunct or one
159c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader    // letter.
1603d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9  // virama
161c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader            && !isPureKiller(c1)
1623d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
1633d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        return false;
1643d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
1656638e05ac2de397455c30cae05aca399a567428dRaph Levien    // Tailoring: make emoji sequences with ZWJ a single grapheme cluster
1666638e05ac2de397455c30cae05aca399a567428dRaph Levien    if (c1 == 0x200D && isZwjEmoji(c2) && offset_back > start) {
1676638e05ac2de397455c30cae05aca399a567428dRaph Levien        // look at character before ZWJ to see that both can participate in an emoji zwj sequence
1686638e05ac2de397455c30cae05aca399a567428dRaph Levien        uint32_t c0 = 0;
1696638e05ac2de397455c30cae05aca399a567428dRaph Levien        U16_PREV(buf, start, offset_back, c0);
1706638e05ac2de397455c30cae05aca399a567428dRaph Levien        if (c0 == 0xFE0F && offset_back > start) {
1716638e05ac2de397455c30cae05aca399a567428dRaph Levien            // skip over emoji variation selector
1726638e05ac2de397455c30cae05aca399a567428dRaph Levien            U16_PREV(buf, start, offset_back, c0);
1736638e05ac2de397455c30cae05aca399a567428dRaph Levien        }
1746638e05ac2de397455c30cae05aca399a567428dRaph Levien        if (isZwjEmoji(c0)) {
1756638e05ac2de397455c30cae05aca399a567428dRaph Levien            return false;
1766638e05ac2de397455c30cae05aca399a567428dRaph Levien        }
1776638e05ac2de397455c30cae05aca399a567428dRaph Levien    }
178adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    // Proposed Rule GB9c from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf
179adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    // E_Base x E_Modifier
180adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    if (isEmojiModifier(c2)) {
181adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien        if (c1 == 0xFE0F && offset_back > start) {
182adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien            // skip over emoji variation selector
183adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien            U16_PREV(buf, start, offset_back, c1);
184adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien        }
185adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien        if (isEmojiBase(c1)) {
186adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien            return false;
187adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien        }
188adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien    }
189d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien    // Rule GB10, Any ÷ Any
1903d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    return true;
1913d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien}
1923d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
1933d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Leviensize_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count,
1943d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        size_t offset, MoveOpt opt) {
1953d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    switch (opt) {
1963d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    case AFTER:
1973d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        if (offset < start + count) {
1983d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            offset++;
1993d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        }
2003d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        // fall through
2013d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    case AT_OR_AFTER:
2023d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        while (!isGraphemeBreak(buf, start, count, offset)) {
2033d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            offset++;
2043d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        }
2053d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        break;
2063d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    case BEFORE:
2073d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        if (offset > start) {
2083d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            offset--;
2093d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        }
2103d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        // fall through
2113d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    case AT_OR_BEFORE:
2123d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        while (!isGraphemeBreak(buf, start, count, offset)) {
2133d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            offset--;
2143d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        }
2153d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        break;
2163d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    case AT:
2173d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        if (!isGraphemeBreak(buf, start, count, offset)) {
2183d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien            offset = (size_t)-1;
2193d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        }
2203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien        break;
2213d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    }
2223d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien    return offset;
2233d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien}
2243d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien
2253d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien}  // namespace android
226