PhoneticStringUtilsTest.cpp revision 0b161e0ce405b617a85d0f1b717bd3d7df056ced
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "PhoneticStringUtils.h"
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22
23#include <utils/String8.h>
24
25using namespace android;
26
27class TestExecutor {
28 public:
29  TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {}
30  bool DoAllTests();
31 private:
32  void DoOneTest(void (TestExecutor::*test)());
33
34  void testUtf32At();
35  void testGetUtf8FromUtf32();
36  void testGetNormalizedString();
37  void testLongString();
38
39  // Note: When adding a test, do not forget to add it to DoOneTest().
40
41  int m_total_count;
42  int m_success_count;
43
44  bool m_success;
45};
46
47#define ASSERT_EQ_VALUE(input, expected)                                \
48  ({                                                                    \
49    if ((expected) != (input)) {                                        \
50      printf("0x%X(result) != 0x%X(expected)\n", input, expected);      \
51      m_success = false;                                                \
52      return;                                                           \
53    }                                                                   \
54  })
55
56#define EXPECT_EQ_VALUE(input, expected)                                \
57  ({                                                                    \
58    if ((expected) != (input)) {                                        \
59      printf("0x%X(result) != 0x%X(expected)\n", input, expected);      \
60      m_success = false;                                                \
61    }                                                                   \
62  })
63
64
65bool TestExecutor::DoAllTests() {
66  DoOneTest(&TestExecutor::testUtf32At);
67  DoOneTest(&TestExecutor::testGetUtf8FromUtf32);
68  DoOneTest(&TestExecutor::testGetNormalizedString);
69  DoOneTest(&TestExecutor::testLongString);
70
71  printf("Test total: %d\nSuccess: %d\nFailure: %d\n",
72         m_total_count, m_success_count, m_total_count - m_success_count);
73
74  bool success = m_total_count == m_success_count;
75  printf("\n%s\n", success ? "Success" : "Failure");
76
77  return success;
78}
79
80void TestExecutor::DoOneTest(void (TestExecutor::*test)()) {
81  m_success = true;
82
83  (this->*test)();
84
85  ++m_total_count;
86  m_success_count += m_success ? 1 : 0;
87}
88
89#define TEST_GET_UTF32AT(src, index, expected_next, expected_value)     \
90  ({                                                                    \
91    size_t next;                                                        \
92    int32_t ret = utf32_from_utf8_at(src, strlen(src), index, &next);   \
93    if (ret < 0) {                                                      \
94      printf("getUtf32At() returned negative value (src: %s, index: %d)\n", \
95             (src), (index));                                           \
96      m_success = false;                                                \
97    } else if (next != (expected_next)) {                               \
98      printf("next is unexpected value (src: %s, actual: %u, expected: %u)\n", \
99             (src), next, (expected_next));                             \
100    } else {                                                            \
101      EXPECT_EQ_VALUE(ret, (expected_value));                           \
102    }                                                                   \
103   })
104
105void TestExecutor::testUtf32At() {
106  printf("testUtf32At()\n");
107
108  TEST_GET_UTF32AT("a", 0, 1, 97);
109  // Japanese hiragana "a"
110  TEST_GET_UTF32AT("\xE3\x81\x82", 0, 3, 0x3042);
111  // Japanese fullwidth katakana "a" with ascii a
112  TEST_GET_UTF32AT("a\xE3\x82\xA2", 1, 4, 0x30A2);
113
114  // 2 PUA
115  TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 0, 4, 0xFE000);
116  TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 4, 8, 0xFE008);
117}
118
119
120#define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected)                   \
121  ({                                                                    \
122    char32_t codepoints[1] = {codepoint};                                \
123    status_t ret = string8.setTo(codepoints, 1);                        \
124    if (ret != NO_ERROR) {                                              \
125      printf("GetUtf8FromCodePoint() returned false at 0x%04X\n", codepoint); \
126      m_success = false;                                                \
127    } else {                                                            \
128      const char* string = string8.string();                            \
129      if (strcmp(string, expected) != 0) {                              \
130        printf("Failed at codepoint 0x%04X\n", codepoint);              \
131        for (const char *ch = string; *ch != '\0'; ++ch) {              \
132          printf("0x%X ", *ch);                                         \
133        }                                                               \
134        printf("!= ");                                                  \
135        for (const char *ch = expected; *ch != '\0'; ++ch) {            \
136          printf("0x%X ", *ch);                                         \
137        }                                                               \
138        printf("\n");                                                   \
139        m_success = false;                                              \
140      }                                                                 \
141    }                                                                   \
142  })
143
144void TestExecutor::testGetUtf8FromUtf32() {
145  printf("testGetUtf8FromUtf32()\n");
146  String8 string8;
147
148  EXPECT_EQ_CODEPOINT_UTF8('a', "\x61");
149  // Armenian capital letter AYB (2 bytes in UTF8)
150  EXPECT_EQ_CODEPOINT_UTF8(0x0530, "\xD4\xB0");
151  // Japanese 'a' (3 bytes in UTF8)
152  EXPECT_EQ_CODEPOINT_UTF8(0x3042, "\xE3\x81\x82");
153  // Kanji
154  EXPECT_EQ_CODEPOINT_UTF8(0x65E5, "\xE6\x97\xA5");
155  // PUA (4 byets in UTF8)
156  EXPECT_EQ_CODEPOINT_UTF8(0xFE016, "\xF3\xBE\x80\x96");
157  EXPECT_EQ_CODEPOINT_UTF8(0xFE972, "\xF3\xBE\xA5\xB2");
158}
159
160#define EXPECT_EQ_UTF8_UTF8(src, expected)                              \
161  ({                                                                    \
162    if (!GetNormalizedString(src, &dst, &len)) {                        \
163      printf("GetNormalizedSortableString() returned false.\n");      \
164      m_success = false;                                                \
165    } else {                                                            \
166      if (strcmp(dst, expected) != 0) {                                 \
167        for (const char *ch = dst; *ch != '\0'; ++ch) {                 \
168          printf("0x%X ", *ch);                                         \
169        }                                                               \
170        printf("!= ");                                                  \
171        for (const char *ch = expected; *ch != '\0'; ++ch) {            \
172          printf("0x%X ", *ch);                                         \
173        }                                                               \
174        printf("\n");                                                   \
175        m_success = false;                                              \
176      }                                                                 \
177      free(dst);                                                        \
178    }                                                                   \
179   })
180
181void TestExecutor::testGetNormalizedString() {
182  printf("testGetNormalizedString()\n");
183  char *dst;
184  size_t len;
185
186  // halfwidth alphabets/symbols -> keep it as is.
187  EXPECT_EQ_UTF8_UTF8("ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()",
188                      "ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()");
189  EXPECT_EQ_UTF8_UTF8("abcdefghijklmnopqrstuvwxyz[]{}\\@/",
190                      "abcdefghijklmnopqrstuvwxyz[]{}\\@/");
191
192  // halfwidth/fullwidth-katakana -> hiragana
193  EXPECT_EQ_UTF8_UTF8(
194      "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
195      "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
196
197  // whitespace -> keep it as is.
198  EXPECT_EQ_UTF8_UTF8("    \t", "    \t");
199}
200
201void TestExecutor::testLongString() {
202  printf("testLongString()\n");
203  char * dst;
204  size_t len;
205  EXPECT_EQ_UTF8_UTF8("Qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqtttttttttttttttttttttttttttttttttttttttttttttttttgggggggggggggggggggggggggggggggggggggggbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
206      "Qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqtttttttttttttttttttttttttttttttttttttttttttttttttggggggggggggggggggggggggggggggggggg");
207}
208
209
210int main() {
211  TestExecutor executor;
212  if(executor.DoAllTests()) {
213    return 0;
214  } else {
215    return 1;
216  }
217}
218