hb-utf-private.hh revision e634fed4285ce440d277345727ed01757df6d779
1/* 2 * Copyright © 2011,2012,2014 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27#ifndef HB_UTF_PRIVATE_HH 28#define HB_UTF_PRIVATE_HH 29 30#include "hb-private.hh" 31 32 33/* UTF-8 */ 34 35static inline const uint8_t * 36hb_utf_next (const uint8_t *text, 37 const uint8_t *end, 38 hb_codepoint_t *unicode) 39{ 40 /* Written to only accept well-formed sequences. 41 * Based on ideas from ICU's U8_NEXT. 42 * Generates a -1 for each ill-formed byte. */ 43 44 hb_codepoint_t c = *text++; 45 46 if (c > 0x7Fu) 47 { 48 if (hb_in_range (c, 0xC2u, 0xDFu)) /* Two-byte */ 49 { 50 unsigned int t1; 51 if (likely (text < end && 52 (t1 = text[0] - 0x80u) <= 0x3Fu)) 53 { 54 c = ((c&0x1Fu)<<6) | t1; 55 text++; 56 } 57 else 58 goto error; 59 } 60 else if (hb_in_range (c, 0xE0u, 0xEFu)) /* Three-byte */ 61 { 62 unsigned int t1, t2; 63 if (likely (1 < end - text && 64 (t1 = text[0] - 0x80u) <= 0x3Fu && 65 (t2 = text[1] - 0x80u) <= 0x3Fu)) 66 { 67 c = ((c&0xFu)<<12) | (t1<<6) | t2; 68 if (unlikely (c < 0x0800u || hb_in_range (c, 0xD800u, 0xDFFFu))) 69 goto error; 70 text += 2; 71 } 72 else 73 goto error; 74 } 75 else if (hb_in_range (c, 0xF0u, 0xF4u)) /* Four-byte */ 76 { 77 unsigned int t1, t2, t3; 78 if (likely (2 < end - text && 79 (t1 = text[0] - 0x80u) <= 0x3Fu && 80 (t2 = text[1] - 0x80u) <= 0x3Fu && 81 (t3 = text[2] - 0x80u) <= 0x3Fu)) 82 { 83 c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3; 84 if (unlikely (!hb_in_range (c, 0x10000u, 0x10FFFFu))) 85 goto error; 86 text += 3; 87 } 88 else 89 goto error; 90 } 91 else 92 goto error; 93 } 94 95 *unicode = c; 96 return text; 97 98error: 99 *unicode = -1; 100 return text; 101} 102 103static inline const uint8_t * 104hb_utf_prev (const uint8_t *text, 105 const uint8_t *start, 106 hb_codepoint_t *unicode) 107{ 108 const uint8_t *end = text--; 109 while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) 110 text--; 111 112 if (likely (hb_utf_next (text, end, unicode) == end)) 113 return text; 114 115 *unicode = -1; 116 return end - 1; 117} 118 119 120static inline unsigned int 121hb_utf_strlen (const uint8_t *text) 122{ 123 return strlen ((const char *) text); 124} 125 126 127/* UTF-16 */ 128 129static inline const uint16_t * 130hb_utf_next (const uint16_t *text, 131 const uint16_t *end, 132 hb_codepoint_t *unicode) 133{ 134 hb_codepoint_t c = *text++; 135 136 if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) 137 { 138 *unicode = c; 139 return text; 140 } 141 142 if (likely (hb_in_range (c, 0xD800u, 0xDBFFu))) 143 { 144 /* High-surrogate in c */ 145 hb_codepoint_t l; 146 if (text < end && ((l = *text), likely (hb_in_range (l, 0xDC00u, 0xDFFFu)))) 147 { 148 /* Low-surrogate in l */ 149 *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u); 150 text++; 151 return text; 152 } 153 } 154 155 /* Lonely / out-of-order surrogate. */ 156 *unicode = -1; 157 return text; 158} 159 160static inline const uint16_t * 161hb_utf_prev (const uint16_t *text, 162 const uint16_t *start, 163 hb_codepoint_t *unicode) 164{ 165 const uint16_t *end = text--; 166 hb_codepoint_t c = *text; 167 168 if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) 169 { 170 *unicode = c; 171 return text; 172 } 173 174 if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu))) 175 text--; 176 177 if (likely (hb_utf_next (text, end, unicode) == end)) 178 return text; 179 180 *unicode = -1; 181 return end - 1; 182} 183 184 185static inline unsigned int 186hb_utf_strlen (const uint16_t *text) 187{ 188 unsigned int l = 0; 189 while (*text++) l++; 190 return l; 191} 192 193 194/* UTF-32 */ 195 196static inline const uint32_t * 197hb_utf_next (const uint32_t *text, 198 const uint32_t *end HB_UNUSED, 199 hb_codepoint_t *unicode) 200{ 201 hb_codepoint_t c = *text++; 202 if (unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu))) 203 goto error; 204 *unicode = c; 205 return text; 206 207error: 208 *unicode = -1; 209 return text; 210} 211 212static inline const uint32_t * 213hb_utf_prev (const uint32_t *text, 214 const uint32_t *start HB_UNUSED, 215 hb_codepoint_t *unicode) 216{ 217 hb_utf_next (text - 1, text, unicode); 218 return text - 1; 219} 220 221static inline unsigned int 222hb_utf_strlen (const uint32_t *text) 223{ 224 unsigned int l = 0; 225 while (*text++) l++; 226 return l; 227} 228 229 230#endif /* HB_UTF_PRIVATE_HH */ 231