1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <stdint.h> 18#include <unicode/uchar.h> 19#include <unicode/utf16.h> 20 21#include <minikin/GraphemeBreak.h> 22 23namespace android { 24 25bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, 26 size_t offset) { 27 // This implementation closely follows Unicode Standard Annex #29 on 28 // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/), 29 // implementing a tailored version of extended grapheme clusters. 30 // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules. 31 32 // Rule GB1, sot /; Rule GB2, / eot 33 if (offset <= start || offset >= start + count) { 34 return true; 35 } 36 if (U16_IS_TRAIL(buf[offset])) { 37 // Don't break a surrogate pair 38 return false; 39 } 40 uint32_t c1 = 0; 41 uint32_t c2 = 0; 42 size_t offset_back = offset; 43 U16_PREV(buf, start, offset_back, c1); 44 U16_NEXT(buf, offset, count, c2); 45 int32_t p1 = u_getIntPropertyValue(c1, UCHAR_GRAPHEME_CLUSTER_BREAK); 46 int32_t p2 = u_getIntPropertyValue(c2, UCHAR_GRAPHEME_CLUSTER_BREAK); 47 // Rule GB3, CR x LF 48 if (p1 == U_GCB_CR && p2 == U_GCB_LF) { 49 return false; 50 } 51 // Rule GB4, (Control | CR | LF) / 52 if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) { 53 return true; 54 } 55 // Rule GB5, / (Control | CR | LF) 56 if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) { 57 // exclude zero-width control characters from breaking (tailoring of TR29) 58 if (c2 == 0x00ad 59 || (c2 >= 0x200b && c2 <= 0x200f) 60 || (c2 >= 0x2028 && c2 <= 0x202e) 61 || (c2 >= 0x2060 && c2 <= 0x206f)) { 62 return false; 63 } 64 return true; 65 } 66 // Rule GB6, L x ( L | V | LV | LVT ) 67 if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) { 68 return false; 69 } 70 // Rule GB7, ( LV | V ) x ( V | T ) 71 if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) { 72 return false; 73 } 74 // Rule GB8, ( LVT | T ) x T 75 if ((p1 == U_GCB_L || p1 == U_GCB_T) && p2 == U_GCB_T) { 76 return false; 77 } 78 // Rule GB8a, Regional_Indicator x Regional_Indicator 79 if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) { 80 return false; 81 } 82 // Rule GB9, x Extend; Rule GB9a, x SpacingMark 83 if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK) { 84 if (c2 == 0xe33) { 85 // most other implementations break THAI CHARACTER SARA AM 86 // (tailoring of TR29) 87 return true; 88 } 89 return false; 90 } 91 // Cluster indic syllables togeter (tailoring of TR29) 92 if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama 93 && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { 94 return false; 95 } 96 // Rule GB10, Any / Any 97 return true; 98} 99 100size_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count, 101 size_t offset, MoveOpt opt) { 102 switch (opt) { 103 case AFTER: 104 if (offset < start + count) { 105 offset++; 106 } 107 // fall through 108 case AT_OR_AFTER: 109 while (!isGraphemeBreak(buf, start, count, offset)) { 110 offset++; 111 } 112 break; 113 case BEFORE: 114 if (offset > start) { 115 offset--; 116 } 117 // fall through 118 case AT_OR_BEFORE: 119 while (!isGraphemeBreak(buf, start, count, offset)) { 120 offset--; 121 } 122 break; 123 case AT: 124 if (!isGraphemeBreak(buf, start, count, offset)) { 125 offset = (size_t)-1; 126 } 127 break; 128 } 129 return offset; 130} 131 132} // namespace android 133