1f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi/* 2f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project 3f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * 4f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 5f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 6f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * You may obtain a copy of the License at 7f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * 8f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 9f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * 10f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 11f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 12f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * See the License for the specific language governing permissions and 14f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi * limitations under the License. 15f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi */ 16f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 17f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi#include "utils/utf8_utils.h" 18f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 19f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi#include <gtest/gtest.h> 20f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 21f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi#include <vector> 22f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 23f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi#include "utils/int_array_view.h" 24f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 25f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanaginamespace latinime { 26f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanaginamespace dicttoolkit { 27f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanaginamespace { 28f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 29f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke KuroyanagiTEST(Utf8UtilsTests, TestGetCodePoints) { 30f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi { 31f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi const std::vector<int> codePoints = Utf8Utils::getCodePoints(""); 32f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ(0u, codePoints.size()); 33f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi } 34f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi { 35f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi const std::vector<int> codePoints = Utf8Utils::getCodePoints("test"); 36f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ(4u, codePoints.size()); 37f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ('t', codePoints[0]); 38f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ('e', codePoints[1]); 39f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ('s', codePoints[2]); 40f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ('t', codePoints[3]); 41f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi } 42f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi { 43f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\u3042a\u03C2\u0410"); 44f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ(4u, codePoints.size()); 45f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ(0x3042, codePoints[0]); // HIRAGANA LETTER A 46f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ('a', codePoints[1]); 47f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ(0x03C2, codePoints[2]); // CYRILLIC CAPITAL LETTER A 48f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ(0x0410, codePoints[3]); // GREEK SMALL LETTER FINAL SIGMA 49f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi } 50f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi { 51f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\U0001F36A?\U0001F752"); 52f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ(3u, codePoints.size()); 53f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ(0x1F36A, codePoints[0]); // COOKIE 54f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ('?', codePoints[1]); 55f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ(0x1F752, codePoints[2]); // ALCHEMICAL SYMBOL FOR STARRED TRIDENT 56f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi } 57f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 58f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi // Redundant UTF-8 sequences must be rejected. 59f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_TRUE(Utf8Utils::getCodePoints("\xC0\xAF").empty()); 60f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_TRUE(Utf8Utils::getCodePoints("\xE0\x80\xAF").empty()); 61f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_TRUE(Utf8Utils::getCodePoints("\xF0\x80\x80\xAF").empty()); 62f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi} 63f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 64f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke KuroyanagiTEST(Utf8UtilsTests, TestGetUtf8String) { 65f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi { 66f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi const std::vector<int> codePoints = {'t', 'e', 's', 't'}; 67f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ("test", Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); 68f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi } 69f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi { 70f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi const std::vector<int> codePoints = { 71f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 0x00E0 /* LATIN SMALL LETTER A WITH GRAVE */, 72f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 0x03C2 /* GREEK SMALL LETTER FINAL SIGMA */, 73f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 0x0430 /* CYRILLIC SMALL LETTER A */, 74f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 0x3042 /* HIRAGANA LETTER A */, 75f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 0x1F36A /* COOKIE */, 76f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 0x1F752 /* ALCHEMICAL SYMBOL FOR STARRED TRIDENT */ 77f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi }; 78f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi EXPECT_EQ(u8"\u00E0\u03C2\u0430\u3042\U0001F36A\U0001F752", 79f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); 80f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi } 81f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi} 82f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi 83f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi} // namespace 84f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi} // namespace dicttoolkit 85f0c303dd02a5df8ad544b3971e7738cb34a1d6beKeisuke Kuroyanagi} // namespace latinime 86