wchar_test.cpp revision efaa461bd67cfdfcbc2b4c6b69805bc5b929ae05
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <gtest/gtest.h>
18
19#include <errno.h>
20#include <limits.h>
21#include <locale.h>
22#include <stdint.h>
23#include <wchar.h>
24
25TEST(wchar, sizeof_wchar_t) {
26  EXPECT_EQ(4U, sizeof(wchar_t));
27  EXPECT_EQ(4U, sizeof(wint_t));
28}
29
30TEST(wchar, mbrlen) {
31  char bytes[] = { 'h', 'e', 'l', 'l', 'o', '\0' };
32  EXPECT_EQ(0U, mbrlen(&bytes[0], 0, NULL));
33  EXPECT_EQ(1U, mbrlen(&bytes[0], 1, NULL));
34
35  EXPECT_EQ(1U, mbrlen(&bytes[4], 1, NULL));
36  EXPECT_EQ(0U, mbrlen(&bytes[5], 1, NULL));
37}
38
39TEST(wchar, wctomb_wcrtomb) {
40  // wctomb and wcrtomb behave differently when s == NULL.
41  EXPECT_EQ(0, wctomb(NULL, L'h'));
42  EXPECT_EQ(0, wctomb(NULL, L'\0'));
43  EXPECT_EQ(1U, wcrtomb(NULL, L'\0', NULL));
44  EXPECT_EQ(1U, wcrtomb(NULL, L'h', NULL));
45
46  char bytes[MB_LEN_MAX];
47
48  // wctomb and wcrtomb behave similarly for the null wide character.
49  EXPECT_EQ(1, wctomb(bytes, L'\0'));
50  EXPECT_EQ(1U, wcrtomb(bytes, L'\0', NULL));
51
52  // ...and for regular characters.
53  memset(bytes, 0, sizeof(bytes));
54  EXPECT_EQ(1, wctomb(bytes, L'h'));
55  EXPECT_EQ('h', bytes[0]);
56  memset(bytes, 0, sizeof(bytes));
57  EXPECT_EQ(1U, wcrtomb(bytes, L'h', NULL));
58  EXPECT_EQ('h', bytes[0]);
59
60  ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
61  uselocale(LC_GLOBAL_LOCALE);
62
63  // 1-byte UTF-8.
64  memset(bytes, 0, sizeof(bytes));
65  EXPECT_EQ(1U, wcrtomb(bytes, L'h', NULL));
66  EXPECT_EQ('h', bytes[0]);
67  // 2-byte UTF-8.
68  memset(bytes, 0, sizeof(bytes));
69  EXPECT_EQ(2U, wcrtomb(bytes, 0x00a2, NULL));
70  EXPECT_EQ('\xc2', bytes[0]);
71  EXPECT_EQ('\xa2', bytes[1]);
72  // 3-byte UTF-8.
73  memset(bytes, 0, sizeof(bytes));
74  EXPECT_EQ(3U, wcrtomb(bytes, 0x20ac, NULL));
75  EXPECT_EQ('\xe2', bytes[0]);
76  EXPECT_EQ('\x82', bytes[1]);
77  EXPECT_EQ('\xac', bytes[2]);
78  // 4-byte UTF-8.
79  memset(bytes, 0, sizeof(bytes));
80  EXPECT_EQ(4U, wcrtomb(bytes, 0x24b62, NULL));
81  EXPECT_EQ('\xf0', bytes[0]);
82  EXPECT_EQ('\xa4', bytes[1]);
83  EXPECT_EQ('\xad', bytes[2]);
84  EXPECT_EQ('\xa2', bytes[3]);
85  // Invalid code point.
86  EXPECT_EQ(static_cast<size_t>(-1), wcrtomb(bytes, 0xffffffff, NULL));
87  EXPECT_EQ(EILSEQ, errno);
88}
89
90TEST(wchar, wcstombs_wcrtombs) {
91  const wchar_t chars[] = { L'h', L'e', L'l', L'l', L'o', 0 };
92  const wchar_t bad_chars[] = { L'h', L'i', static_cast<wchar_t>(0xffffffff), 0 };
93  const wchar_t* src;
94  char bytes[BUFSIZ];
95
96  // Given a NULL destination, these functions count valid characters.
97  EXPECT_EQ(5U, wcstombs(NULL, chars, 0));
98  EXPECT_EQ(5U, wcstombs(NULL, chars, 4));
99  EXPECT_EQ(5U, wcstombs(NULL, chars, 256));
100  src = chars;
101  EXPECT_EQ(5U, wcsrtombs(NULL, &src, 0, NULL));
102  EXPECT_EQ(&chars[0], src);
103  src = chars;
104  EXPECT_EQ(5U, wcsrtombs(NULL, &src, 4, NULL));
105  EXPECT_EQ(&chars[0], src);
106  src = chars;
107  EXPECT_EQ(5U, wcsrtombs(NULL, &src, 256, NULL));
108  EXPECT_EQ(&chars[0], src);
109
110  // An unrepresentable char just returns an error from wcstombs...
111  errno = 0;
112  EXPECT_EQ(static_cast<size_t>(-1), wcstombs(NULL, bad_chars, 0));
113  EXPECT_EQ(EILSEQ, errno);
114  errno = 0;
115  EXPECT_EQ(static_cast<size_t>(-1), wcstombs(NULL, bad_chars, 256));
116  EXPECT_EQ(EILSEQ, errno);
117
118  // And wcsrtombs doesn't tell us where it got stuck because we didn't ask it
119  // to actually convert anything...
120  errno = 0;
121  src = bad_chars;
122  EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(NULL, &src, 0, NULL));
123  EXPECT_EQ(&bad_chars[0], src);
124  EXPECT_EQ(EILSEQ, errno);
125  errno = 0;
126  src = bad_chars;
127  EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(NULL, &src, 256, NULL));
128  EXPECT_EQ(&bad_chars[0], src);
129  EXPECT_EQ(EILSEQ, errno);
130
131  // Okay, now let's test actually converting something...
132  memset(bytes, 'x', sizeof(bytes));
133  EXPECT_EQ(0U, wcstombs(bytes, chars, 0));
134  memset(bytes, 'x', sizeof(bytes));
135  EXPECT_EQ(4U, wcstombs(bytes, chars, 4));
136  bytes[5] = 0;
137  EXPECT_STREQ("hellx", bytes);
138  memset(bytes, 'x', sizeof(bytes));
139  EXPECT_EQ(5U, wcstombs(bytes, chars, 256));
140  EXPECT_STREQ("hello", bytes);
141  memset(bytes, 'x', sizeof(bytes));
142  EXPECT_EQ(5U, wcstombs(bytes, chars, 6));
143  EXPECT_STREQ("hello", bytes);
144  errno = 0;
145  memset(bytes, 'x', sizeof(bytes));
146  EXPECT_EQ(static_cast<size_t>(-1), wcstombs(bytes, bad_chars, 256));
147  EXPECT_EQ(EILSEQ, errno);
148  bytes[3] = 0;
149  EXPECT_STREQ("hix", bytes);
150
151  // wcsrtombs is a bit more informative...
152  memset(bytes, 'x', sizeof(bytes));
153  src = chars;
154  EXPECT_EQ(0U, wcsrtombs(bytes, &src, 0, NULL));
155  EXPECT_EQ(&chars[0], src); // No input consumed.
156  EXPECT_EQ(EILSEQ, errno);
157
158  memset(bytes, 'x', sizeof(bytes));
159  src = chars;
160  EXPECT_EQ(4U, wcsrtombs(bytes, &src, 4, NULL));
161  EXPECT_EQ(&chars[4], src); // Some input consumed.
162  EXPECT_EQ(EILSEQ, errno);
163  bytes[5] = 0;
164  EXPECT_STREQ("hellx", bytes);
165
166  memset(bytes, 'x', sizeof(bytes));
167  src = chars;
168  EXPECT_EQ(5U, wcsrtombs(bytes, &src, 256, NULL));
169  EXPECT_EQ(NULL, src); // All input consumed!
170  EXPECT_EQ(EILSEQ, errno);
171  EXPECT_STREQ("hello", bytes);
172
173  memset(bytes, 'x', sizeof(bytes));
174  src = chars;
175  EXPECT_EQ(5U, wcsrtombs(bytes, &src, 6, NULL));
176  EXPECT_EQ(NULL, src); // All input consumed.
177  EXPECT_EQ(EILSEQ, errno);
178  EXPECT_STREQ("hello", bytes);
179
180  memset(bytes, 'x', sizeof(bytes));
181  src = bad_chars;
182  EXPECT_EQ(static_cast<size_t>(-1), wcsrtombs(bytes, &src, 256, NULL));
183  EXPECT_EQ(&bad_chars[2], src);
184  EXPECT_EQ(EILSEQ, errno);
185  bytes[3] = 0;
186  EXPECT_STREQ("hix", bytes);
187}
188
189TEST(wchar, limits) {
190  ASSERT_LT(WCHAR_MIN, WCHAR_MAX);
191}
192
193TEST(wchar, wcsstr_wcswcs) {
194  const wchar_t* haystack = L"matches hello world, not the second hello world";
195  const wchar_t* empty_needle = L"";
196  const wchar_t* good_needle = L"ll";
197  const wchar_t* bad_needle = L"wort";
198
199  ASSERT_EQ(haystack, wcsstr(haystack, empty_needle));
200  ASSERT_EQ(&haystack[10], wcsstr(haystack, good_needle));
201  ASSERT_EQ(NULL, wcsstr(haystack, bad_needle));
202
203  ASSERT_EQ(haystack, wcswcs(haystack, empty_needle));
204  ASSERT_EQ(&haystack[10], wcswcs(haystack, good_needle));
205  ASSERT_EQ(NULL, wcswcs(haystack, bad_needle));
206}
207
208TEST(wchar, mbtowc) {
209  wchar_t out[8];
210
211  out[0] = 'x';
212  ASSERT_EQ(0, mbtowc(out, "hello", 0));
213  ASSERT_EQ('x', out[0]);
214
215  ASSERT_EQ(0, mbtowc(out, "hello", 0));
216  ASSERT_EQ(0, mbtowc(out, "", 0));
217  ASSERT_EQ(1, mbtowc(out, "hello", 1));
218  ASSERT_EQ(L'h', out[0]);
219
220  ASSERT_EQ(0, mbtowc(NULL, "hello", 0));
221  ASSERT_EQ(0, mbtowc(NULL, "", 0));
222  ASSERT_EQ(1, mbtowc(NULL, "hello", 1));
223
224  ASSERT_EQ(0, mbtowc(NULL, NULL, 0));
225}
226
227TEST(wchar, mbrtowc) {
228  wchar_t out[8];
229
230  out[0] = 'x';
231  ASSERT_EQ(0U, mbrtowc(out, "hello", 0, NULL));
232  ASSERT_EQ('x', out[0]);
233
234  ASSERT_EQ(0U, mbrtowc(out, "hello", 0, NULL));
235  ASSERT_EQ(0U, mbrtowc(out, "", 0, NULL));
236  ASSERT_EQ(1U, mbrtowc(out, "hello", 1, NULL));
237  ASSERT_EQ(L'h', out[0]);
238
239  ASSERT_EQ(0U, mbrtowc(NULL, "hello", 0, NULL));
240  ASSERT_EQ(0U, mbrtowc(NULL, "", 0, NULL));
241  ASSERT_EQ(1U, mbrtowc(NULL, "hello", 1, NULL));
242
243  ASSERT_EQ(0U, mbrtowc(NULL, NULL, 0, NULL));
244
245  ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
246  uselocale(LC_GLOBAL_LOCALE);
247
248  // 1-byte UTF-8.
249  ASSERT_EQ(1U, mbrtowc(out, "abcdef", 6, NULL));
250  ASSERT_EQ(L'a', out[0]);
251  // 2-byte UTF-8.
252  ASSERT_EQ(2U, mbrtowc(out, "\xc2\xa2" "cdef", 6, NULL));
253  ASSERT_EQ(0x00a2, out[0]);
254  // 3-byte UTF-8.
255  ASSERT_EQ(3U, mbrtowc(out, "\xe2\x82\xac" "def", 6, NULL));
256  ASSERT_EQ(0x20ac, out[0]);
257  // 4-byte UTF-8.
258  ASSERT_EQ(4U, mbrtowc(out, "\xf0\xa4\xad\xa2" "ef", 6, NULL));
259  ASSERT_EQ(0x24b62, out[0]);
260#if __BIONIC__ // glibc allows this.
261  // Illegal 5-byte UTF-8.
262  ASSERT_EQ(static_cast<size_t>(-1), mbrtowc(out, "\xf8\xa1\xa2\xa3\xa4" "f", 6, NULL));
263  ASSERT_EQ(EILSEQ, errno);
264#endif
265  // Illegal over-long sequence.
266  ASSERT_EQ(static_cast<size_t>(-1), mbrtowc(out, "\xf0\x82\x82\xac" "ef", 6, NULL));
267  ASSERT_EQ(EILSEQ, errno);
268}
269
270TEST(wchar, wcstod) {
271  ASSERT_DOUBLE_EQ(1.23, wcstod(L"1.23", NULL));
272}
273
274TEST(wchar, wcstof) {
275  ASSERT_FLOAT_EQ(1.23f, wcstof(L"1.23", NULL));
276}
277
278TEST(wchar, wcstol) {
279  ASSERT_EQ(123L, wcstol(L"123", NULL, 0));
280}
281
282TEST(wchar, wcstoll) {
283  ASSERT_EQ(123LL, wcstol(L"123", NULL, 0));
284}
285
286TEST(wchar, wcstold) {
287  ASSERT_DOUBLE_EQ(1.23L, wcstold(L"1.23", NULL));
288}
289
290TEST(wchar, wcstoul) {
291  ASSERT_EQ(123UL, wcstoul(L"123", NULL, 0));
292}
293
294TEST(wchar, wcstoull) {
295  ASSERT_EQ(123ULL, wcstoul(L"123", NULL, 0));
296}
297
298TEST(wchar, mbsnrtowcs) {
299  wchar_t dst[128];
300  const char* s = "hello, world!";
301  const char* src;
302
303  memset(dst, 0, sizeof(dst));
304  src = s;
305  ASSERT_EQ(0U, mbsnrtowcs(dst, &src, 0, 0, NULL));
306
307  memset(dst, 0, sizeof(dst));
308  src = s;
309  ASSERT_EQ(2U, mbsnrtowcs(dst, &src, 2, 123, NULL)); // glibc chokes on SIZE_MAX here.
310  ASSERT_EQ(L'h', dst[0]);
311  ASSERT_EQ(L'e', dst[1]);
312  ASSERT_EQ(&s[2], src);
313
314  memset(dst, 0, sizeof(dst));
315  src = s;
316  ASSERT_EQ(3U, mbsnrtowcs(dst, &src, SIZE_MAX, 3, NULL));
317  ASSERT_EQ(L'h', dst[0]);
318  ASSERT_EQ(L'e', dst[1]);
319  ASSERT_EQ(L'l', dst[2]);
320  ASSERT_EQ(&s[3], src);
321}
322
323TEST(wchar, wcsftime) {
324  setenv("TZ", "UTC", 1);
325
326  struct tm t;
327  memset(&t, 0, sizeof(tm));
328  t.tm_year = 200;
329  t.tm_mon = 2;
330  t.tm_mday = 10;
331
332  wchar_t buf[64];
333
334  EXPECT_EQ(24U, wcsftime(buf, sizeof(buf), L"%c", &t));
335  EXPECT_STREQ(L"Sun Mar 10 00:00:00 2100", buf);
336}
337