1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "utils/utf8_utils.h" 18 19#include <gtest/gtest.h> 20 21#include <vector> 22 23#include "utils/int_array_view.h" 24 25namespace latinime { 26namespace dicttoolkit { 27namespace { 28 29TEST(Utf8UtilsTests, TestGetCodePoints) { 30 { 31 const std::vector<int> codePoints = Utf8Utils::getCodePoints(""); 32 EXPECT_EQ(0u, codePoints.size()); 33 } 34 { 35 const std::vector<int> codePoints = Utf8Utils::getCodePoints("test"); 36 EXPECT_EQ(4u, codePoints.size()); 37 EXPECT_EQ('t', codePoints[0]); 38 EXPECT_EQ('e', codePoints[1]); 39 EXPECT_EQ('s', codePoints[2]); 40 EXPECT_EQ('t', codePoints[3]); 41 } 42 { 43 const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\u3042a\u03C2\u0410"); 44 EXPECT_EQ(4u, codePoints.size()); 45 EXPECT_EQ(0x3042, codePoints[0]); // HIRAGANA LETTER A 46 EXPECT_EQ('a', codePoints[1]); 47 EXPECT_EQ(0x03C2, codePoints[2]); // CYRILLIC CAPITAL LETTER A 48 EXPECT_EQ(0x0410, codePoints[3]); // GREEK SMALL LETTER FINAL SIGMA 49 } 50 { 51 const std::vector<int> codePoints = Utf8Utils::getCodePoints(u8"\U0001F36A?\U0001F752"); 52 EXPECT_EQ(3u, codePoints.size()); 53 EXPECT_EQ(0x1F36A, codePoints[0]); // COOKIE 54 EXPECT_EQ('?', codePoints[1]); 55 EXPECT_EQ(0x1F752, codePoints[2]); // ALCHEMICAL SYMBOL FOR STARRED TRIDENT 56 } 57 58 // Redundant UTF-8 sequences must be rejected. 59 EXPECT_TRUE(Utf8Utils::getCodePoints("\xC0\xAF").empty()); 60 EXPECT_TRUE(Utf8Utils::getCodePoints("\xE0\x80\xAF").empty()); 61 EXPECT_TRUE(Utf8Utils::getCodePoints("\xF0\x80\x80\xAF").empty()); 62} 63 64TEST(Utf8UtilsTests, TestGetUtf8String) { 65 { 66 const std::vector<int> codePoints = {'t', 'e', 's', 't'}; 67 EXPECT_EQ("test", Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); 68 } 69 { 70 const std::vector<int> codePoints = { 71 0x00E0 /* LATIN SMALL LETTER A WITH GRAVE */, 72 0x03C2 /* GREEK SMALL LETTER FINAL SIGMA */, 73 0x0430 /* CYRILLIC SMALL LETTER A */, 74 0x3042 /* HIRAGANA LETTER A */, 75 0x1F36A /* COOKIE */, 76 0x1F752 /* ALCHEMICAL SYMBOL FOR STARRED TRIDENT */ 77 }; 78 EXPECT_EQ(u8"\u00E0\u03C2\u0430\u3042\U0001F36A\U0001F752", 79 Utf8Utils::getUtf8String(CodePointArrayView(codePoints))); 80 } 81} 82 83} // namespace 84} // namespace dicttoolkit 85} // namespace latinime 86