1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18#include <sys/cdefs.h> 19#if defined(__BIONIC__) 20#define HAVE_UCHAR 1 21#elif defined(__GLIBC__) 22#define HAVE_UCHAR __GLIBC_PREREQ(2, 16) 23#endif 24 25#include <gtest/gtest.h> 26 27#include <errno.h> 28#include <limits.h> 29#include <locale.h> 30#include <stdint.h> 31 32#if HAVE_UCHAR 33#include <uchar.h> 34#endif 35 36TEST(uchar, sizeof_uchar_t) { 37#if HAVE_UCHAR 38 EXPECT_EQ(2U, sizeof(char16_t)); 39 EXPECT_EQ(4U, sizeof(char32_t)); 40#else 41 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 42#endif 43} 44 45TEST(uchar, start_state) { 46#if HAVE_UCHAR 47 char out[MB_LEN_MAX]; 48 mbstate_t ps; 49 50 // Any non-initial state is invalid when calling c32rtomb. 51 memset(&ps, 0, sizeof(ps)); 52 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xc2", 1, &ps)); 53 EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(out, 0x00a2, &ps)); 54 EXPECT_EQ(EILSEQ, errno); 55 56 // If the first argument to c32rtomb is NULL or the second is L'\0' the shift 57 // state should be reset. 58 memset(&ps, 0, sizeof(ps)); 59 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xc2", 1, &ps)); 60 EXPECT_EQ(1U, c32rtomb(NULL, 0x00a2, &ps)); 61 EXPECT_TRUE(mbsinit(&ps)); 62 63 memset(&ps, 0, sizeof(ps)); 64 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xf0\xa4", 1, &ps)); 65 EXPECT_EQ(1U, c32rtomb(out, L'\0', &ps)); 66 EXPECT_TRUE(mbsinit(&ps)); 67#else 68 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 69#endif 70} 71 72TEST(uchar, c16rtomb_null_out) { 73#if HAVE_UCHAR 74 EXPECT_EQ(1U, c16rtomb(NULL, L'\0', NULL)); 75 EXPECT_EQ(1U, c16rtomb(NULL, L'h', NULL)); 76#else 77 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 78#endif 79} 80 81TEST(uchar, c16rtomb_null_char) { 82#if HAVE_UCHAR 83 char bytes[MB_LEN_MAX]; 84 EXPECT_EQ(1U, c16rtomb(bytes, L'\0', NULL)); 85#else 86 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 87#endif 88} 89 90TEST(uchar, c16rtomb) { 91#if HAVE_UCHAR 92 char bytes[MB_LEN_MAX]; 93 94 memset(bytes, 0, sizeof(bytes)); 95 EXPECT_EQ(1U, c16rtomb(bytes, L'h', NULL)); 96 EXPECT_EQ('h', bytes[0]); 97 98 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 99 uselocale(LC_GLOBAL_LOCALE); 100 101 // 1-byte UTF-8. 102 memset(bytes, 0, sizeof(bytes)); 103 EXPECT_EQ(1U, c16rtomb(bytes, L'h', NULL)); 104 EXPECT_EQ('h', bytes[0]); 105 // 2-byte UTF-8. 106 memset(bytes, 0, sizeof(bytes)); 107 EXPECT_EQ(2U, c16rtomb(bytes, 0x00a2, NULL)); 108 EXPECT_EQ('\xc2', bytes[0]); 109 EXPECT_EQ('\xa2', bytes[1]); 110 // 3-byte UTF-8. 111 memset(bytes, 0, sizeof(bytes)); 112 EXPECT_EQ(3U, c16rtomb(bytes, 0x20ac, NULL)); 113 EXPECT_EQ('\xe2', bytes[0]); 114 EXPECT_EQ('\x82', bytes[1]); 115 EXPECT_EQ('\xac', bytes[2]); 116#else 117 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 118#endif 119} 120 121TEST(uchar, c16rtomb_surrogate) { 122#if HAVE_UCHAR 123 char bytes[MB_LEN_MAX]; 124 125 memset(bytes, 0, sizeof(bytes)); 126 EXPECT_EQ(0U, c16rtomb(bytes, 0xdbea, NULL)); 127 EXPECT_EQ(4U, c16rtomb(bytes, 0xdfcd, NULL)); 128 EXPECT_EQ('\xf4', bytes[0]); 129 EXPECT_EQ('\x8a', bytes[1]); 130 EXPECT_EQ('\xaf', bytes[2]); 131 EXPECT_EQ('\x8d', bytes[3]); 132#else 133 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 134#endif 135} 136 137TEST(uchar, c16rtomb_invalid) { 138#if HAVE_UCHAR 139 char bytes[MB_LEN_MAX]; 140 141 memset(bytes, 0, sizeof(bytes)); 142 EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdfcd, NULL)); 143 144 EXPECT_EQ(0U, c16rtomb(bytes, 0xdbea, NULL)); 145 EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdbea, NULL)); 146#else 147 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 148#endif 149} 150 151TEST(uchar, mbrtoc16_null) { 152#if HAVE_UCHAR 153 ASSERT_EQ(0U, mbrtoc16(NULL, NULL, 0, NULL)); 154#else 155 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 156#endif 157} 158 159TEST(uchar, mbrtoc16_zero_len) { 160#if HAVE_UCHAR 161 char16_t out; 162 163 out = L'x'; 164 ASSERT_EQ(0U, mbrtoc16(&out, "hello", 0, NULL)); 165 ASSERT_EQ(L'x', out); 166 167 ASSERT_EQ(0U, mbrtoc16(&out, "hello", 0, NULL)); 168 ASSERT_EQ(0U, mbrtoc16(&out, "", 0, NULL)); 169 ASSERT_EQ(1U, mbrtoc16(&out, "hello", 1, NULL)); 170 ASSERT_EQ(L'h', out); 171#else 172 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 173#endif 174} 175 176TEST(uchar, mbrtoc16) { 177#if HAVE_UCHAR 178 char16_t out; 179 180 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 181 uselocale(LC_GLOBAL_LOCALE); 182 183 // 1-byte UTF-8. 184 ASSERT_EQ(1U, mbrtoc16(&out, "abcdef", 6, NULL)); 185 ASSERT_EQ(L'a', out); 186 // 2-byte UTF-8. 187 ASSERT_EQ(2U, mbrtoc16(&out, "\xc2\xa2" "cdef", 6, NULL)); 188 ASSERT_EQ(static_cast<char16_t>(0x00a2), out); 189 // 3-byte UTF-8. 190 ASSERT_EQ(3U, mbrtoc16(&out, "\xe2\x82\xac" "def", 6, NULL)); 191 ASSERT_EQ(static_cast<char16_t>(0x20ac), out); 192#else 193 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 194#endif 195} 196 197TEST(uchar, mbrtoc16_surrogate) { 198#if HAVE_UCHAR 199 char16_t out; 200 201 ASSERT_EQ(static_cast<size_t>(-3), 202 mbrtoc16(&out, "\xf4\x8a\xaf\x8d", 6, NULL)); 203 ASSERT_EQ(static_cast<char16_t>(0xdbea), out); 204 ASSERT_EQ(4U, mbrtoc16(&out, "\xf4\x8a\xaf\x8d" "ef", 6, NULL)); 205 ASSERT_EQ(static_cast<char16_t>(0xdfcd), out); 206#else 207 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 208#endif 209} 210 211TEST(uchar, mbrtoc16_reserved_range) { 212#if HAVE_UCHAR 213 char16_t out; 214 ASSERT_EQ(static_cast<size_t>(-1), 215 mbrtoc16(&out, "\xf0\x80\xbf\xbf", 6, NULL)); 216#else 217 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 218#endif 219} 220 221TEST(uchar, mbrtoc16_beyond_range) { 222#if HAVE_UCHAR 223 char16_t out; 224 ASSERT_EQ(static_cast<size_t>(-1), 225 mbrtoc16(&out, "\xf5\x80\x80\x80", 6, NULL)); 226#else 227 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 228#endif 229} 230 231#if HAVE_UCHAR 232void test_mbrtoc16_incomplete(mbstate_t* ps) { 233 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 234 uselocale(LC_GLOBAL_LOCALE); 235 236 char16_t out; 237 // 2-byte UTF-8. 238 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps)); 239 ASSERT_EQ(1U, mbrtoc16(&out, "\xa2" "cdef", 5, ps)); 240 ASSERT_EQ(static_cast<char16_t>(0x00a2), out); 241 ASSERT_TRUE(mbsinit(ps)); 242 // 3-byte UTF-8. 243 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xe2", 1, ps)); 244 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x82", 1, ps)); 245 ASSERT_EQ(1U, mbrtoc16(&out, "\xac" "def", 4, ps)); 246 ASSERT_EQ(static_cast<char16_t>(0x20ac), out); 247 ASSERT_TRUE(mbsinit(ps)); 248 // 4-byte UTF-8. 249 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xf4", 1, ps)); 250 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x8a\xaf", 2, ps)); 251 ASSERT_EQ(static_cast<size_t>(-3), mbrtoc16(&out, "\x8d" "ef", 3, ps)); 252 ASSERT_EQ(static_cast<char16_t>(0xdbea), out); 253 ASSERT_EQ(1U, mbrtoc16(&out, "\x80" "ef", 3, ps)); 254 ASSERT_EQ(static_cast<char16_t>(0xdfcd), out); 255 ASSERT_TRUE(mbsinit(ps)); 256 257 // Invalid 2-byte 258 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps)); 259 ASSERT_EQ(static_cast<size_t>(-1), mbrtoc16(&out, "\x20" "cdef", 5, ps)); 260 ASSERT_EQ(EILSEQ, errno); 261} 262#endif 263 264TEST(uchar, mbrtoc16_incomplete) { 265#if HAVE_UCHAR 266 mbstate_t ps; 267 memset(&ps, 0, sizeof(ps)); 268 269 test_mbrtoc16_incomplete(&ps); 270 test_mbrtoc16_incomplete(NULL); 271#else 272 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 273#endif 274} 275 276TEST(uchar, c32rtomb) { 277#if HAVE_UCHAR 278 EXPECT_EQ(1U, c32rtomb(NULL, L'\0', NULL)); 279 EXPECT_EQ(1U, c32rtomb(NULL, L'h', NULL)); 280 281 char bytes[MB_LEN_MAX]; 282 283 EXPECT_EQ(1U, c32rtomb(bytes, L'\0', NULL)); 284 285 memset(bytes, 0, sizeof(bytes)); 286 EXPECT_EQ(1U, c32rtomb(bytes, L'h', NULL)); 287 EXPECT_EQ('h', bytes[0]); 288 289 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 290 uselocale(LC_GLOBAL_LOCALE); 291 292 // 1-byte UTF-8. 293 memset(bytes, 0, sizeof(bytes)); 294 EXPECT_EQ(1U, c32rtomb(bytes, L'h', NULL)); 295 EXPECT_EQ('h', bytes[0]); 296 // 2-byte UTF-8. 297 memset(bytes, 0, sizeof(bytes)); 298 EXPECT_EQ(2U, c32rtomb(bytes, 0x00a2, NULL)); 299 EXPECT_EQ('\xc2', bytes[0]); 300 EXPECT_EQ('\xa2', bytes[1]); 301 // 3-byte UTF-8. 302 memset(bytes, 0, sizeof(bytes)); 303 EXPECT_EQ(3U, c32rtomb(bytes, 0x20ac, NULL)); 304 EXPECT_EQ('\xe2', bytes[0]); 305 EXPECT_EQ('\x82', bytes[1]); 306 EXPECT_EQ('\xac', bytes[2]); 307 // 4-byte UTF-8. 308 memset(bytes, 0, sizeof(bytes)); 309 EXPECT_EQ(4U, c32rtomb(bytes, 0x24b62, NULL)); 310 EXPECT_EQ('\xf0', bytes[0]); 311 EXPECT_EQ('\xa4', bytes[1]); 312 EXPECT_EQ('\xad', bytes[2]); 313 EXPECT_EQ('\xa2', bytes[3]); 314 // Invalid code point. 315 EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(bytes, 0xffffffff, NULL)); 316 EXPECT_EQ(EILSEQ, errno); 317#else 318 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 319#endif 320} 321 322TEST(uchar, mbrtoc32) { 323#if HAVE_UCHAR 324 char32_t out[8]; 325 326 out[0] = L'x'; 327 ASSERT_EQ(0U, mbrtoc32(out, "hello", 0, NULL)); 328 ASSERT_EQ(static_cast<char32_t>(L'x'), out[0]); 329 330 ASSERT_EQ(0U, mbrtoc32(out, "hello", 0, NULL)); 331 ASSERT_EQ(0U, mbrtoc32(out, "", 0, NULL)); 332 ASSERT_EQ(1U, mbrtoc32(out, "hello", 1, NULL)); 333 ASSERT_EQ(static_cast<char32_t>(L'h'), out[0]); 334 335 ASSERT_EQ(0U, mbrtoc32(NULL, "hello", 0, NULL)); 336 ASSERT_EQ(0U, mbrtoc32(NULL, "", 0, NULL)); 337 ASSERT_EQ(1U, mbrtoc32(NULL, "hello", 1, NULL)); 338 339 ASSERT_EQ(0U, mbrtoc32(NULL, NULL, 0, NULL)); 340 341 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 342 uselocale(LC_GLOBAL_LOCALE); 343 344 // 1-byte UTF-8. 345 ASSERT_EQ(1U, mbrtoc32(out, "abcdef", 6, NULL)); 346 ASSERT_EQ(static_cast<char32_t>(L'a'), out[0]); 347 // 2-byte UTF-8. 348 ASSERT_EQ(2U, mbrtoc32(out, "\xc2\xa2" "cdef", 6, NULL)); 349 ASSERT_EQ(static_cast<char32_t>(0x00a2), out[0]); 350 // 3-byte UTF-8. 351 ASSERT_EQ(3U, mbrtoc32(out, "\xe2\x82\xac" "def", 6, NULL)); 352 ASSERT_EQ(static_cast<char32_t>(0x20ac), out[0]); 353 // 4-byte UTF-8. 354 ASSERT_EQ(4U, mbrtoc32(out, "\xf0\xa4\xad\xa2" "ef", 6, NULL)); 355 ASSERT_EQ(static_cast<char32_t>(0x24b62), out[0]); 356#if defined(__BIONIC__) // glibc allows this. 357 // Illegal 5-byte UTF-8. 358 ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf8\xa1\xa2\xa3\xa4" "f", 6, NULL)); 359 ASSERT_EQ(EILSEQ, errno); 360#endif 361 // Illegal over-long sequence. 362 ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf0\x82\x82\xac" "ef", 6, NULL)); 363 ASSERT_EQ(EILSEQ, errno); 364#else 365 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 366#endif 367} 368 369#if HAVE_UCHAR 370void test_mbrtoc32_incomplete(mbstate_t* ps) { 371 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 372 uselocale(LC_GLOBAL_LOCALE); 373 374 char32_t out; 375 // 2-byte UTF-8. 376 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps)); 377 ASSERT_EQ(1U, mbrtoc32(&out, "\xa2" "cdef", 5, ps)); 378 ASSERT_EQ(static_cast<char32_t>(0x00a2), out); 379 ASSERT_TRUE(mbsinit(ps)); 380 // 3-byte UTF-8. 381 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xe2", 1, ps)); 382 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\x82", 1, ps)); 383 ASSERT_EQ(1U, mbrtoc32(&out, "\xac" "def", 4, ps)); 384 ASSERT_EQ(static_cast<char32_t>(0x20ac), out); 385 ASSERT_TRUE(mbsinit(ps)); 386 // 4-byte UTF-8. 387 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xf0", 1, ps)); 388 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xa4\xad", 2, ps)); 389 ASSERT_EQ(1U, mbrtoc32(&out, "\xa2" "ef", 3, ps)); 390 ASSERT_EQ(static_cast<char32_t>(0x24b62), out); 391 ASSERT_TRUE(mbsinit(ps)); 392 393 // Invalid 2-byte 394 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps)); 395 ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(&out, "\x20" "cdef", 5, ps)); 396 ASSERT_EQ(EILSEQ, errno); 397} 398#endif 399 400TEST(uchar, mbrtoc32_incomplete) { 401#if HAVE_UCHAR 402 mbstate_t ps; 403 memset(&ps, 0, sizeof(ps)); 404 405 test_mbrtoc32_incomplete(&ps); 406 test_mbrtoc32_incomplete(NULL); 407#else 408 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 409#endif 410} 411 412