utf_test.cc revision a5afcfc73141e5e378d79a326d02c5c2039fb025
1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "utf.h" 18 19#include "common_runtime_test.h" 20#include "utf-inl.h" 21 22namespace art { 23 24class UtfTest : public CommonRuntimeTest {}; 25 26TEST_F(UtfTest, GetLeadingUtf16Char) { 27 EXPECT_EQ(0xffff, GetLeadingUtf16Char(0xeeeeffff)); 28} 29 30TEST_F(UtfTest, GetTrailingUtf16Char) { 31 EXPECT_EQ(0xffff, GetTrailingUtf16Char(0xffffeeee)); 32 EXPECT_EQ(0, GetTrailingUtf16Char(0x0000aaaa)); 33} 34 35#define EXPECT_ARRAY_POSITION(expected, end, start) \ 36 EXPECT_EQ(static_cast<uintptr_t>(expected), \ 37 reinterpret_cast<uintptr_t>(end) - reinterpret_cast<uintptr_t>(start)); 38 39// A test string containing one, two, three and four byte UTF-8 sequences. 40static const uint8_t kAllSequences[] = { 41 0x24, 42 0xc2, 0xa2, 43 0xe2, 0x82, 0xac, 44 0xf0, 0x9f, 0x8f, 0xa0, 45 0x00 46}; 47 48// A test string that contains a UTF-8 encoding of a surrogate pair 49// (code point = U+10400) 50static const uint8_t kSurrogateEncoding[] = { 51 0xed, 0xa0, 0x81, 52 0xed, 0xb0, 0x80, 53 0x00 54}; 55 56TEST_F(UtfTest, GetUtf16FromUtf8) { 57 const char* const start = reinterpret_cast<const char*>(kAllSequences); 58 const char* ptr = start; 59 uint32_t pair = 0; 60 61 // Single byte sequence. 62 pair = GetUtf16FromUtf8(&ptr); 63 EXPECT_EQ(0x24, GetLeadingUtf16Char(pair)); 64 EXPECT_EQ(0, GetTrailingUtf16Char(pair)); 65 EXPECT_ARRAY_POSITION(1, ptr, start); 66 67 // Two byte sequence 68 pair = GetUtf16FromUtf8(&ptr); 69 EXPECT_EQ(0xa2, GetLeadingUtf16Char(pair)); 70 EXPECT_EQ(0, GetTrailingUtf16Char(pair)); 71 EXPECT_ARRAY_POSITION(3, ptr, start); 72 73 // Three byte sequence 74 pair = GetUtf16FromUtf8(&ptr); 75 EXPECT_EQ(0x20ac, GetLeadingUtf16Char(pair)); 76 EXPECT_EQ(0, GetTrailingUtf16Char(pair)); 77 EXPECT_ARRAY_POSITION(6, ptr, start); 78 79 // Four byte sequence 80 pair = GetUtf16FromUtf8(&ptr); 81 EXPECT_EQ(0xd83c, GetLeadingUtf16Char(pair)); 82 EXPECT_EQ(0xdfe0, GetTrailingUtf16Char(pair)); 83 EXPECT_ARRAY_POSITION(10, ptr, start); 84 85 // Null terminator 86 pair = GetUtf16FromUtf8(&ptr); 87 EXPECT_EQ(0, GetLeadingUtf16Char(pair)); 88 EXPECT_EQ(0, GetTrailingUtf16Char(pair)); 89 EXPECT_ARRAY_POSITION(11, ptr, start); 90} 91 92TEST_F(UtfTest, GetUtf16FromUtf8_SurrogatesPassThrough) { 93 const char* const start = reinterpret_cast<const char *>(kSurrogateEncoding); 94 const char* ptr = start; 95 uint32_t pair = 0; 96 97 pair = GetUtf16FromUtf8(&ptr); 98 EXPECT_EQ(0xd801, GetLeadingUtf16Char(pair)); 99 EXPECT_EQ(0, GetTrailingUtf16Char(pair)); 100 EXPECT_ARRAY_POSITION(3, ptr, start); 101 102 pair = GetUtf16FromUtf8(&ptr); 103 EXPECT_EQ(0xdc00, GetLeadingUtf16Char(pair)); 104 EXPECT_EQ(0, GetTrailingUtf16Char(pair)); 105 EXPECT_ARRAY_POSITION(6, ptr, start); 106} 107 108TEST_F(UtfTest, CountModifiedUtf8Chars) { 109 EXPECT_EQ(5u, CountModifiedUtf8Chars(reinterpret_cast<const char *>(kAllSequences))); 110 EXPECT_EQ(2u, CountModifiedUtf8Chars(reinterpret_cast<const char *>(kSurrogateEncoding))); 111} 112 113} // namespace art 114