1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Unicode_test"
18#include <utils/Log.h>
19#include <utils/Unicode.h>
20
21#include <gtest/gtest.h>
22
23namespace android {
24
25class UnicodeTest : public testing::Test {
26protected:
27    virtual void SetUp() {
28    }
29
30    virtual void TearDown() {
31    }
32
33    char16_t const * const kSearchString = u"I am a leaf on the wind.";
34};
35
36TEST_F(UnicodeTest, UTF8toUTF16ZeroLength) {
37    ssize_t measured;
38
39    const uint8_t str[] = { };
40
41    measured = utf8_to_utf16_length(str, 0);
42    EXPECT_EQ(0, measured)
43            << "Zero length input should return zero length output.";
44}
45
46TEST_F(UnicodeTest, UTF8toUTF16ASCIILength) {
47    ssize_t measured;
48
49    // U+0030 or ASCII '0'
50    const uint8_t str[] = { 0x30 };
51
52    measured = utf8_to_utf16_length(str, sizeof(str));
53    EXPECT_EQ(1, measured)
54            << "ASCII glyphs should have a length of 1 char16_t";
55}
56
57TEST_F(UnicodeTest, UTF8toUTF16Plane1Length) {
58    ssize_t measured;
59
60    // U+2323 SMILE
61    const uint8_t str[] = { 0xE2, 0x8C, 0xA3 };
62
63    measured = utf8_to_utf16_length(str, sizeof(str));
64    EXPECT_EQ(1, measured)
65            << "Plane 1 glyphs should have a length of 1 char16_t";
66}
67
68TEST_F(UnicodeTest, UTF8toUTF16SurrogateLength) {
69    ssize_t measured;
70
71    // U+10000
72    const uint8_t str[] = { 0xF0, 0x90, 0x80, 0x80 };
73
74    measured = utf8_to_utf16_length(str, sizeof(str));
75    EXPECT_EQ(2, measured)
76            << "Surrogate pairs should have a length of 2 char16_t";
77}
78
79TEST_F(UnicodeTest, UTF8toUTF16TruncatedUTF8) {
80    ssize_t measured;
81
82    // Truncated U+2323 SMILE
83    // U+2323 SMILE
84    const uint8_t str[] = { 0xE2, 0x8C };
85
86    measured = utf8_to_utf16_length(str, sizeof(str));
87    EXPECT_EQ(-1, measured)
88            << "Truncated UTF-8 should return -1 to indicate invalid";
89}
90
91TEST_F(UnicodeTest, UTF8toUTF16Normal) {
92    const uint8_t str[] = {
93        0x30, // U+0030, 1 UTF-16 character
94        0xC4, 0x80, // U+0100, 1 UTF-16 character
95        0xE2, 0x8C, 0xA3, // U+2323, 1 UTF-16 character
96        0xF0, 0x90, 0x80, 0x80, // U+10000, 2 UTF-16 character
97    };
98
99    char16_t output[1 + 1 + 1 + 2 + 1]; // Room for NULL
100
101    utf8_to_utf16(str, sizeof(str), output, sizeof(output) / sizeof(output[0]));
102
103    EXPECT_EQ(0x0030, output[0])
104            << "should be U+0030";
105    EXPECT_EQ(0x0100, output[1])
106            << "should be U+0100";
107    EXPECT_EQ(0x2323, output[2])
108            << "should be U+2323";
109    EXPECT_EQ(0xD800, output[3])
110            << "should be first half of surrogate U+10000";
111    EXPECT_EQ(0xDC00, output[4])
112            << "should be second half of surrogate U+10000";
113    EXPECT_EQ(NULL, output[5])
114            << "should be NULL terminated";
115}
116
117TEST_F(UnicodeTest, strstr16EmptyTarget) {
118    EXPECT_EQ(strstr16(kSearchString, u""), kSearchString)
119            << "should return the original pointer";
120}
121
122TEST_F(UnicodeTest, strstr16SameString) {
123    const char16_t* result = strstr16(kSearchString, kSearchString);
124    EXPECT_EQ(kSearchString, result)
125            << "should return the original pointer";
126}
127
128TEST_F(UnicodeTest, strstr16TargetStartOfString) {
129    const char16_t* result = strstr16(kSearchString, u"I am");
130    EXPECT_EQ(kSearchString, result)
131            << "should return the original pointer";
132}
133
134
135TEST_F(UnicodeTest, strstr16TargetEndOfString) {
136    const char16_t* result = strstr16(kSearchString, u"wind.");
137    EXPECT_EQ(kSearchString+19, result);
138}
139
140TEST_F(UnicodeTest, strstr16TargetWithinString) {
141    const char16_t* result = strstr16(kSearchString, u"leaf");
142    EXPECT_EQ(kSearchString+7, result);
143}
144
145TEST_F(UnicodeTest, strstr16TargetNotPresent) {
146    const char16_t* result = strstr16(kSearchString, u"soar");
147    EXPECT_EQ(nullptr, result);
148}
149
150// http://b/29267949
151// Test that overreading in utf8_to_utf16_length is detected
152TEST_F(UnicodeTest, InvalidUtf8OverreadDetected) {
153    // An utf8 char starting with \xc4 is two bytes long.
154    // Add extra zeros so no extra memory is read in case the code doesn't
155    // work as expected.
156    static char utf8[] = "\xc4\x00\x00\x00";
157    ASSERT_DEATH(utf8_to_utf16_length((uint8_t *) utf8, strlen(utf8),
158            true /* overreadIsFatal */), "" /* regex for ASSERT_DEATH */);
159}
160
161}
162