1f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi/*
2f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project
3f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi *
4f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
5f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * you may not use this file except in compliance with the License.
6f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * You may obtain a copy of the License at
7f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi *
8f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi *      http://www.apache.org/licenses/LICENSE-2.0
9f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi *
10f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
11f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
12f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * See the License for the specific language governing permissions and
14f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * limitations under the License.
15f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi */
16f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi
17f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi#include "utils/utf8_utils.h"
18f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi
19f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi#include <gtest/gtest.h>
20f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi
21f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi#include <vector>
22f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi
23f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi#include "utils/int_array_view.h"
24f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi
25f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanaginamespace latinime {
26f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanaginamespace dicttoolkit {
27f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanaginamespace {
28f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi
29f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke KuroyanagiTEST(Utf8UtilsTests, TestGetCodePoints) {
30f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    {
31f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        const std::vector<int> codePoints = Utf8Utils::getCodePoints("");
32f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ(0u, codePoints.size());
33f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    }
34f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    {
35f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        const std::vector<int> codePoints = Utf8Utils::getCodePoints("test");
36f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ(4u, codePoints.size());
37f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ('t', codePoints[0]);
38f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ('e', codePoints[1]);
39f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ('s', codePoints[2]);
40f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ('t', codePoints[3]);
41f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    }
42f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    {
43f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\u3042a\u03C2\u0410");
44f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ(4u, codePoints.size());
45f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ(0x3042, codePoints[0]); // HIRAGANA LETTER A
46f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ('a', codePoints[1]);
47f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ(0x03C2, codePoints[2]); // CYRILLIC CAPITAL LETTER A
48f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ(0x0410, codePoints[3]); // GREEK SMALL LETTER FINAL SIGMA
49f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    }
50f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    {
51f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\U0001F36A?\U0001F752");
52f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ(3u, codePoints.size());
53f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ(0x1F36A, codePoints[0]); // COOKIE
54f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ('?', codePoints[1]);
55f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ(0x1F752, codePoints[2]); // ALCHEMICAL SYMBOL FOR STARRED TRIDENT
56f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    }
57f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi
58f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    // Redundant UTF-8 sequences must be rejected.
59f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    EXPECT_TRUE(Utf8Utils::getCodePoints("\xC0\xAF").empty());
60f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    EXPECT_TRUE(Utf8Utils::getCodePoints("\xE0\x80\xAF").empty());
61f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    EXPECT_TRUE(Utf8Utils::getCodePoints("\xF0\x80\x80\xAF").empty());
62f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi}
63f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi
64f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke KuroyanagiTEST(Utf8UtilsTests, TestGetUtf8String) {
65f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    {
66f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        const std::vector<int> codePoints = {'t', 'e', 's', 't'};
67f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ("test", Utf8Utils::getUtf8String(CodePointArrayView(codePoints)));
68f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    }
69f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    {
70f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        const std::vector<int> codePoints = {
71f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi                0x00E0 /* LATIN SMALL LETTER A WITH GRAVE */,
72f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi                0x03C2 /* GREEK SMALL LETTER FINAL SIGMA */,
73f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi                0x0430 /* CYRILLIC SMALL LETTER A */,
74f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi                0x3042 /* HIRAGANA LETTER A */,
75f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi                0x1F36A /* COOKIE */,
76f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi                0x1F752 /* ALCHEMICAL SYMBOL FOR STARRED TRIDENT */
77f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        };
78f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi        EXPECT_EQ(u8"\u00E0\u03C2\u0430\u3042\U0001F36A\U0001F752",
79f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi                Utf8Utils::getUtf8String(CodePointArrayView(codePoints)));
80f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi    }
81f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi}
82f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi
83f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi} // namespace
84f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi} // namespace dicttoolkit
85f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi} // namespace latinime
86