GraphemeBreak.cpp revision 450e96c8170c3d59a5896e734c90d3f9def505f8
13d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien/* 23d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Copyright (C) 2014 The Android Open Source Project 33d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * 43d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Licensed under the Apache License, Version 2.0 (the "License"); 53d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * you may not use this file except in compliance with the License. 63d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * You may obtain a copy of the License at 73d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * 83d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * http://www.apache.org/licenses/LICENSE-2.0 93d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * 103d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * Unless required by applicable law or agreed to in writing, software 113d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * distributed under the License is distributed on an "AS IS" BASIS, 123d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 133d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * See the License for the specific language governing permissions and 143d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien * limitations under the License. 153d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien */ 163d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 173d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <stdint.h> 18450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka#include <algorithm> 193d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <unicode/uchar.h> 203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <unicode/utf16.h> 213d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 223d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien#include <minikin/GraphemeBreak.h> 230036da164e3b25f1ac29c840c1fe15b03dc6677fSeigo Nonaka#include "MinikinInternal.h" 243d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 253d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Leviennamespace android { 263d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 27c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournaderint32_t tailoredGraphemeClusterBreak(uint32_t c) { 28c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // Characters defined as Control that we want to treat them as Extend. 29c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // These are curated manually. 30c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader if (c == 0x00AD // SHY 31c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x061C // ALM 32c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x180E // MONGOLIAN VOWEL SEPARATOR 33c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x200B // ZWSP 34c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x200E // LRM 35c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x200F // RLM 36c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || (0x202A <= c && c <= 0x202E) // LRE, RLE, PDF, LRO, RLO 37c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || ((c | 0xF) == 0x206F) // WJ, invisible math operators, LRI, RLI, FSI, PDI, 38c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // and the deprecated invisible format controls 39c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0xFEFF // BOM 40c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || ((c | 0x7F) == 0xE007F)) // recently undeprecated tag characters in Plane 14 41c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader return U_GCB_EXTEND; 42c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // UTC-approved characters for the Prepend class, per 43c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // http://www.unicode.org/L2/L2015/15183r-graph-cluster-brk.txt 44c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // These should be removed when our copy of ICU gets updated to Unicode 9.0 (~2016 or 2017). 45c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader else if ((0x0600 <= c && c <= 0x0605) // Arabic subtending marks 46c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x06DD // ARABIC SUBTENDING MARK 47c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x070F // SYRIAC ABBREVIATION MARK 48c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x0D4E // MALAYALAM LETTER DOT REPH 49c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x110BD // KAITHI NUMBER SIGN 50c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x111C2 // SHARADA SIGN JIHVAMULIYA 51c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x111C3) // SHARADA SIGN UPADHMANIYA 52c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader return U_GCB_PREPEND; 53c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // THAI CHARACTER SARA AM is treated as a normal letter by most other implementations: they 54c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // allow a grapheme break before it. 55c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader else if (c == 0x0E33) 56c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader return U_GCB_OTHER; 57c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader else 58c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader return u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); 59c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader} 60c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader 61c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader// Returns true for all characters whose IndicSyllabicCategory is Pure_Killer. 62c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader// From http://www.unicode.org/Public/8.0.0/ucd/IndicSyllabicCategory.txt 63c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournaderbool isPureKiller(uint32_t c) { 64c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader return (c == 0x0E3A || c == 0x0E4E || c == 0x0F84 || c == 0x103A || c == 0x1714 || c == 0x1734 65c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0x17D1 || c == 0x1BAA || c == 0x1BF2 || c == 0x1BF3 || c == 0xA806 66c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader || c == 0xA953 || c == 0xABED || c == 0x11134 || c == 0x112EA || c == 0x1172B); 67c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader} 68c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader 696638e05ac2de397455c30cae05aca399a567428dRaph Levien// Returns true if the character appears before or after zwj in a zwj emoji sequence. See 706638e05ac2de397455c30cae05aca399a567428dRaph Levien// http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html 716638e05ac2de397455c30cae05aca399a567428dRaph Levienbool isZwjEmoji(uint32_t c) { 726638e05ac2de397455c30cae05aca399a567428dRaph Levien return (c == 0x2764 // HEAVY BLACK HEART 736638e05ac2de397455c30cae05aca399a567428dRaph Levien || c == 0x1F468 // MAN 746638e05ac2de397455c30cae05aca399a567428dRaph Levien || c == 0x1F469 // WOMAN 756638e05ac2de397455c30cae05aca399a567428dRaph Levien || c == 0x1F48B // KISS MARK 766638e05ac2de397455c30cae05aca399a567428dRaph Levien || c == 0x1F466 // BOY 776638e05ac2de397455c30cae05aca399a567428dRaph Levien || c == 0x1F467 // GIRL 786638e05ac2de397455c30cae05aca399a567428dRaph Levien || c == 0x1F441 // EYE 796638e05ac2de397455c30cae05aca399a567428dRaph Levien || c == 0x1F5E8); // LEFT SPEECH BUBBLE 806638e05ac2de397455c30cae05aca399a567428dRaph Levien} 816638e05ac2de397455c30cae05aca399a567428dRaph Levien 823d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levienbool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, 833d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien size_t offset) { 843d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // This implementation closely follows Unicode Standard Annex #29 on 853d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/), 863d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // implementing a tailored version of extended grapheme clusters. 873d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules. 883d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 89d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien // Rule GB1, sot ÷; Rule GB2, ÷ eot 903d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (offset <= start || offset >= start + count) { 913d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return true; 923d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 933d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (U16_IS_TRAIL(buf[offset])) { 94d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien // Don't break a surrogate pair, but a lonely trailing surrogate pair is a break 95d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien return !U16_IS_LEAD(buf[offset - 1]); 963d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 973d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien uint32_t c1 = 0; 983d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien uint32_t c2 = 0; 993d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien size_t offset_back = offset; 1003d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien U16_PREV(buf, start, offset_back, c1); 10140beb7744a61248de82a6077996c83c14e0122c2Raph Levien U16_NEXT(buf, offset, start + count, c2); 102c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader int32_t p1 = tailoredGraphemeClusterBreak(c1); 103c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader int32_t p2 = tailoredGraphemeClusterBreak(c2); 1043d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // Rule GB3, CR x LF 1053d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (p1 == U_GCB_CR && p2 == U_GCB_LF) { 1063d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 1073d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 108d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien // Rule GB4, (Control | CR | LF) ÷ 1093d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) { 1103d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return true; 1113d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 112d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien // Rule GB5, ÷ (Control | CR | LF) 1133d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) { 1143d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return true; 1153d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 1163d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // Rule GB6, L x ( L | V | LV | LVT ) 1173d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) { 1183d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 1193d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 1203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // Rule GB7, ( LV | V ) x ( V | T ) 1213d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) { 1223d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 1233d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 1243d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // Rule GB8, ( LVT | T ) x T 125d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) { 1263d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 1273d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 128450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka // Rule GB8a that looks at even-off cases. 129c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // 130450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka // sot (RI RI)* RI x RI 131450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka // [^RI] (RI RI)* RI x RI 132450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka // RI ÷ RI 1333d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) { 134450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka // Look at up to 1000 code units. 135450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka start = std::max((ssize_t)start, (ssize_t)offset_back - 1000); 136450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka while (offset_back > start) { 137450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka U16_PREV(buf, start, offset_back, c1); 138450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka if (tailoredGraphemeClusterBreak(c1) != U_GCB_REGIONAL_INDICATOR) { 139450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka offset_back += U16_LENGTH(c1); 140450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka break; 141450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka } 142450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka } 143450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka 144450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka // Note that the offset has moved forwared 2 code units by U16_NEXT. 145450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka // The number 4 comes from the number of code units in a whole flag. 146450e96c8170c3d59a5896e734c90d3f9def505f8Seigo Nonaka return (offset - 2 - offset_back) % 4 == 0; 1473d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 148c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // Rule GB9, x Extend; Rule GB9a, x SpacingMark; Rule GB9b, Prepend x 149c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) { 1503d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 1513d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 15240beb7744a61248de82a6077996c83c14e0122c2Raph Levien // Cluster indic syllables together (tailoring of UAX #29) 153c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // Known limitation: this is overly conservative, and assumes that the virama may form a 154c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // conjunct with the following letter, which doesn't always happen. 155c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // 156c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // There is no easy solution to do this correctly. Even querying the font does not help (with 157c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // the current font technoloies), since the font may be creating the conjunct using multiple 158c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // glyphs, while the user may be perceiving that sequence of glyphs as one conjunct or one 159c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader // letter. 1603d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama 161c02138bf556739fa9c9212da13aee89aa520c8efRoozbeh Pournader && !isPureKiller(c1) 1623d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { 1633d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return false; 1643d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 1656638e05ac2de397455c30cae05aca399a567428dRaph Levien // Tailoring: make emoji sequences with ZWJ a single grapheme cluster 1666638e05ac2de397455c30cae05aca399a567428dRaph Levien if (c1 == 0x200D && isZwjEmoji(c2) && offset_back > start) { 1676638e05ac2de397455c30cae05aca399a567428dRaph Levien // look at character before ZWJ to see that both can participate in an emoji zwj sequence 1686638e05ac2de397455c30cae05aca399a567428dRaph Levien uint32_t c0 = 0; 1696638e05ac2de397455c30cae05aca399a567428dRaph Levien U16_PREV(buf, start, offset_back, c0); 1706638e05ac2de397455c30cae05aca399a567428dRaph Levien if (c0 == 0xFE0F && offset_back > start) { 1716638e05ac2de397455c30cae05aca399a567428dRaph Levien // skip over emoji variation selector 1726638e05ac2de397455c30cae05aca399a567428dRaph Levien U16_PREV(buf, start, offset_back, c0); 1736638e05ac2de397455c30cae05aca399a567428dRaph Levien } 1746638e05ac2de397455c30cae05aca399a567428dRaph Levien if (isZwjEmoji(c0)) { 1756638e05ac2de397455c30cae05aca399a567428dRaph Levien return false; 1766638e05ac2de397455c30cae05aca399a567428dRaph Levien } 1776638e05ac2de397455c30cae05aca399a567428dRaph Levien } 178adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien // Proposed Rule GB9c from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf 179adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien // E_Base x E_Modifier 180adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien if (isEmojiModifier(c2)) { 181adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien if (c1 == 0xFE0F && offset_back > start) { 182adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien // skip over emoji variation selector 183adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien U16_PREV(buf, start, offset_back, c1); 184adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien } 185adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien if (isEmojiBase(c1)) { 186adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien return false; 187adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien } 188adfa580f1f067c846509b4346e5be2cb19177c1bRaph Levien } 189d8dd94b81ea7efd776859fbbdf4a76458e270eabRaph Levien // Rule GB10, Any ÷ Any 1903d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return true; 1913d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien} 1923d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 1933d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Leviensize_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count, 1943d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien size_t offset, MoveOpt opt) { 1953d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien switch (opt) { 1963d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien case AFTER: 1973d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (offset < start + count) { 1983d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien offset++; 1993d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2003d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // fall through 2013d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien case AT_OR_AFTER: 2023d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien while (!isGraphemeBreak(buf, start, count, offset)) { 2033d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien offset++; 2043d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2053d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien break; 2063d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien case BEFORE: 2073d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (offset > start) { 2083d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien offset--; 2093d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2103d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien // fall through 2113d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien case AT_OR_BEFORE: 2123d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien while (!isGraphemeBreak(buf, start, count, offset)) { 2133d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien offset--; 2143d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2153d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien break; 2163d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien case AT: 2173d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien if (!isGraphemeBreak(buf, start, count, offset)) { 2183d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien offset = (size_t)-1; 2193d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2203d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien break; 2213d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien } 2223d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien return offset; 2233d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien} 2243d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien 2253d28a3fcebfed4744d1ef0307a8bdc8fc01e364cRaph Levien} // namespace android 226