1/* 2 * Copyright © 2011,2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27#ifndef HB_UTF_PRIVATE_HH 28#define HB_UTF_PRIVATE_HH 29 30#include "hb-private.hh" 31 32 33/* UTF-8 */ 34 35#define HB_UTF8_COMPUTE(Char, Mask, Len) \ 36 if (Char < 128) { Len = 1; Mask = 0x7f; } \ 37 else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \ 38 else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \ 39 else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \ 40 else Len = 0; 41 42static inline const uint8_t * 43hb_utf_next (const uint8_t *text, 44 const uint8_t *end, 45 hb_codepoint_t *unicode) 46{ 47 hb_codepoint_t c = *text, mask; 48 unsigned int len; 49 50 /* TODO check for overlong sequences? */ 51 52 HB_UTF8_COMPUTE (c, mask, len); 53 if (unlikely (!len || (unsigned int) (end - text) < len)) { 54 *unicode = -1; 55 return text + 1; 56 } else { 57 hb_codepoint_t result; 58 unsigned int i; 59 result = c & mask; 60 for (i = 1; i < len; i++) 61 { 62 if (unlikely ((text[i] & 0xc0) != 0x80)) 63 { 64 *unicode = -1; 65 return text + 1; 66 } 67 result <<= 6; 68 result |= (text[i] & 0x3f); 69 } 70 *unicode = result; 71 return text + len; 72 } 73} 74 75static inline const uint8_t * 76hb_utf_prev (const uint8_t *text, 77 const uint8_t *start, 78 hb_codepoint_t *unicode) 79{ 80 const uint8_t *end = text--; 81 while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) 82 text--; 83 84 hb_codepoint_t c = *text, mask; 85 unsigned int len; 86 87 /* TODO check for overlong sequences? */ 88 89 HB_UTF8_COMPUTE (c, mask, len); 90 if (unlikely (!len || (unsigned int) (end - text) != len)) { 91 *unicode = -1; 92 return end - 1; 93 } else { 94 hb_codepoint_t result; 95 unsigned int i; 96 result = c & mask; 97 for (i = 1; i < len; i++) 98 { 99 result <<= 6; 100 result |= (text[i] & 0x3f); 101 } 102 *unicode = result; 103 return text; 104 } 105} 106 107 108static inline unsigned int 109hb_utf_strlen (const uint8_t *text) 110{ 111 return strlen ((const char *) text); 112} 113 114 115/* UTF-16 */ 116 117static inline const uint16_t * 118hb_utf_next (const uint16_t *text, 119 const uint16_t *end, 120 hb_codepoint_t *unicode) 121{ 122 hb_codepoint_t c = *text++; 123 124 if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff))) 125 { 126 /* high surrogate */ 127 hb_codepoint_t l; 128 if (text < end && ((l = *text), likely (hb_in_range<hb_codepoint_t> (l, 0xdc00, 0xdfff)))) 129 { 130 /* low surrogate */ 131 *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00); 132 text++; 133 } else 134 *unicode = -1; 135 } else 136 *unicode = c; 137 138 return text; 139} 140 141static inline const uint16_t * 142hb_utf_prev (const uint16_t *text, 143 const uint16_t *start, 144 hb_codepoint_t *unicode) 145{ 146 hb_codepoint_t c = *--text; 147 148 if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff))) 149 { 150 /* low surrogate */ 151 hb_codepoint_t h; 152 if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff)))) 153 { 154 /* high surrogate */ 155 *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00); 156 text--; 157 } else 158 *unicode = -1; 159 } else 160 *unicode = c; 161 162 return text; 163} 164 165 166static inline unsigned int 167hb_utf_strlen (const uint16_t *text) 168{ 169 unsigned int l = 0; 170 while (*text++) l++; 171 return l; 172} 173 174 175/* UTF-32 */ 176 177static inline const uint32_t * 178hb_utf_next (const uint32_t *text, 179 const uint32_t *end HB_UNUSED, 180 hb_codepoint_t *unicode) 181{ 182 *unicode = *text++; 183 return text; 184} 185 186static inline const uint32_t * 187hb_utf_prev (const uint32_t *text, 188 const uint32_t *start HB_UNUSED, 189 hb_codepoint_t *unicode) 190{ 191 *unicode = *--text; 192 return text; 193} 194 195static inline unsigned int 196hb_utf_strlen (const uint32_t *text) 197{ 198 unsigned int l = 0; 199 while (*text++) l++; 200 return l; 201} 202 203 204#endif /* HB_UTF_PRIVATE_HH */ 205