1/*
2 * Copyright © 2011,2012  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_UTF_PRIVATE_HH
28#define HB_UTF_PRIVATE_HH
29
30#include "hb-private.hh"
31
32
33/* UTF-8 */
34
35#define HB_UTF8_COMPUTE(Char, Mask, Len) \
36  if (Char < 128) { Len = 1; Mask = 0x7f; } \
37  else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \
38  else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \
39  else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \
40  else Len = 0;
41
42static inline const uint8_t *
43hb_utf_next (const uint8_t *text,
44	     const uint8_t *end,
45	     hb_codepoint_t *unicode)
46{
47  hb_codepoint_t c = *text, mask;
48  unsigned int len;
49
50  /* TODO check for overlong sequences? */
51
52  HB_UTF8_COMPUTE (c, mask, len);
53  if (unlikely (!len || (unsigned int) (end - text) < len)) {
54    *unicode = -1;
55    return text + 1;
56  } else {
57    hb_codepoint_t result;
58    unsigned int i;
59    result = c & mask;
60    for (i = 1; i < len; i++)
61      {
62	if (unlikely ((text[i] & 0xc0) != 0x80))
63	  {
64	    *unicode = -1;
65	    return text + 1;
66	  }
67	result <<= 6;
68	result |= (text[i] & 0x3f);
69      }
70    *unicode = result;
71    return text + len;
72  }
73}
74
75static inline const uint8_t *
76hb_utf_prev (const uint8_t *text,
77	     const uint8_t *start,
78	     hb_codepoint_t *unicode)
79{
80  const uint8_t *end = text--;
81  while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
82    text--;
83
84  hb_codepoint_t c = *text, mask;
85  unsigned int len;
86
87  /* TODO check for overlong sequences? */
88
89  HB_UTF8_COMPUTE (c, mask, len);
90  if (unlikely (!len || (unsigned int) (end - text) != len)) {
91    *unicode = -1;
92    return end - 1;
93  } else {
94    hb_codepoint_t result;
95    unsigned int i;
96    result = c & mask;
97    for (i = 1; i < len; i++)
98      {
99	result <<= 6;
100	result |= (text[i] & 0x3f);
101      }
102    *unicode = result;
103    return text;
104  }
105}
106
107
108static inline unsigned int
109hb_utf_strlen (const uint8_t *text)
110{
111  return strlen ((const char *) text);
112}
113
114
115/* UTF-16 */
116
117static inline const uint16_t *
118hb_utf_next (const uint16_t *text,
119	     const uint16_t *end,
120	     hb_codepoint_t *unicode)
121{
122  hb_codepoint_t c = *text++;
123
124  if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff)))
125  {
126    /* high surrogate */
127    hb_codepoint_t l;
128    if (text < end && ((l = *text), likely (hb_in_range<hb_codepoint_t> (l, 0xdc00, 0xdfff))))
129    {
130      /* low surrogate */
131      *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00);
132       text++;
133    } else
134      *unicode = -1;
135  } else
136    *unicode = c;
137
138  return text;
139}
140
141static inline const uint16_t *
142hb_utf_prev (const uint16_t *text,
143	     const uint16_t *start,
144	     hb_codepoint_t *unicode)
145{
146  hb_codepoint_t c = *--text;
147
148  if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff)))
149  {
150    /* low surrogate */
151    hb_codepoint_t h;
152    if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff))))
153    {
154      /* high surrogate */
155      *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00);
156       text--;
157    } else
158      *unicode = -1;
159  } else
160    *unicode = c;
161
162  return text;
163}
164
165
166static inline unsigned int
167hb_utf_strlen (const uint16_t *text)
168{
169  unsigned int l = 0;
170  while (*text++) l++;
171  return l;
172}
173
174
175/* UTF-32 */
176
177static inline const uint32_t *
178hb_utf_next (const uint32_t *text,
179	     const uint32_t *end HB_UNUSED,
180	     hb_codepoint_t *unicode)
181{
182  *unicode = *text++;
183  return text;
184}
185
186static inline const uint32_t *
187hb_utf_prev (const uint32_t *text,
188	     const uint32_t *start HB_UNUSED,
189	     hb_codepoint_t *unicode)
190{
191  *unicode = *--text;
192  return text;
193}
194
195static inline unsigned int
196hb_utf_strlen (const uint32_t *text)
197{
198  unsigned int l = 0;
199  while (*text++) l++;
200  return l;
201}
202
203
204#endif /* HB_UTF_PRIVATE_HH */
205