1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "ppapi/tests/test_char_set.h"
6
7#include "ppapi/c/dev/ppb_char_set_dev.h"
8#include "ppapi/c/trusted/ppb_char_set_trusted.h"
9#include "ppapi/cpp/dev/memory_dev.h"
10#include "ppapi/cpp/module.h"
11#include "ppapi/tests/testing_instance.h"
12
13REGISTER_TEST_CASE(CharSet);
14
15TestCharSet::TestCharSet(TestingInstance* instance)
16    : TestCase(instance),
17      char_set_interface_(NULL) {
18}
19
20bool TestCharSet::Init() {
21  char_set_interface_ = static_cast<const PPB_CharSet_Dev*>(
22      pp::Module::Get()->GetBrowserInterface(PPB_CHAR_SET_DEV_INTERFACE));
23  char_set_trusted_interface_ = static_cast<const PPB_CharSet_Trusted*>(
24      pp::Module::Get()->GetBrowserInterface(PPB_CHARSET_TRUSTED_INTERFACE));
25  return char_set_interface_ && char_set_trusted_interface_;
26}
27
28void TestCharSet::RunTests(const std::string& filter) {
29  RUN_TEST(UTF16ToCharSetDeprecated, filter);
30  RUN_TEST(UTF16ToCharSet, filter);
31  RUN_TEST(CharSetToUTF16Deprecated, filter);
32  RUN_TEST(CharSetToUTF16, filter);
33  RUN_TEST(GetDefaultCharSet, filter);
34}
35
36// TODO(brettw) remove this when the old interface is removed.
37std::string TestCharSet::TestUTF16ToCharSetDeprecated() {
38  // Empty string.
39  std::vector<uint16_t> utf16;
40  utf16.push_back(0);
41  uint32_t utf8result_len = 0;
42  pp::Memory_Dev memory;
43  char* utf8result = char_set_interface_->UTF16ToCharSet(
44      instance_->pp_instance(), &utf16[0], 0, "latin1",
45      PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
46  ASSERT_TRUE(utf8result);
47  ASSERT_TRUE(utf8result[0] == 0);
48  ASSERT_TRUE(utf8result_len == 0);
49  memory.MemFree(utf8result);
50
51  // Try round-tripping some English & Chinese from UTF-8 through UTF-16
52  std::string utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd");
53  utf16 = UTF8ToUTF16(utf8source);
54  utf8result = char_set_interface_->UTF16ToCharSet(
55      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
56      "Utf-8", PP_CHARSET_CONVERSIONERROR_FAIL, &utf8result_len);
57  ASSERT_TRUE(utf8source == std::string(utf8result, utf8result_len));
58  memory.MemFree(utf8result);
59
60  // Test an un-encodable character with various modes.
61  utf16 = UTF8ToUTF16("h\xe4\xbd\xa0i");
62
63  // Fail mode.
64  utf8result_len = 1234;  // Test that this gets 0'ed on failure.
65  utf8result = char_set_interface_->UTF16ToCharSet(
66      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
67      "latin1", PP_CHARSET_CONVERSIONERROR_FAIL, &utf8result_len);
68  ASSERT_TRUE(utf8result_len == 0);
69  ASSERT_TRUE(utf8result == NULL);
70
71  // Skip mode.
72  utf8result = char_set_interface_->UTF16ToCharSet(
73      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
74      "latin1", PP_CHARSET_CONVERSIONERROR_SKIP, &utf8result_len);
75  ASSERT_TRUE(utf8result_len == 2);
76  ASSERT_TRUE(utf8result[0] == 'h' && utf8result[1] == 'i' &&
77              utf8result[2] == 0);
78  memory.MemFree(utf8result);
79
80  // Substitute mode.
81  utf8result = char_set_interface_->UTF16ToCharSet(
82      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
83      "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
84  ASSERT_TRUE(utf8result_len == 3);
85  ASSERT_TRUE(utf8result[0] == 'h' && utf8result[1] == '?' &&
86              utf8result[2] == 'i' && utf8result[3] == 0);
87  memory.MemFree(utf8result);
88
89  // Try some invalid input encoding.
90  utf16.clear();
91  utf16.push_back(0xD800);  // High surrogate.
92  utf16.push_back('A');  // Not a low surrogate.
93  utf8result = char_set_interface_->UTF16ToCharSet(
94      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
95      "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
96  ASSERT_TRUE(utf8result_len == 2);
97  ASSERT_TRUE(utf8result[0] == '?' && utf8result[1] == 'A' &&
98              utf8result[2] == 0);
99  memory.MemFree(utf8result);
100
101  // Invalid encoding name.
102  utf8result = char_set_interface_->UTF16ToCharSet(
103      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
104      "poopiepants", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
105  ASSERT_TRUE(!utf8result);
106  ASSERT_TRUE(utf8result_len == 0);
107
108  PASS();
109}
110
111std::string TestCharSet::TestUTF16ToCharSet() {
112  // Empty string.
113  std::vector<uint16_t> utf16;
114  utf16.push_back(0);
115  std::string output_buffer;
116  output_buffer.resize(1);
117  uint32_t utf8result_len = 0;
118  PP_Bool result = char_set_trusted_interface_->UTF16ToCharSet(
119      &utf16[0], 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
120      &output_buffer[0], &utf8result_len);
121  ASSERT_TRUE(result == PP_TRUE);
122  ASSERT_TRUE(utf8result_len == 0);
123
124  // No output buffer returns length of string.
125  utf16 = UTF8ToUTF16("hello");
126  utf8result_len = 0;
127  result = char_set_trusted_interface_->UTF16ToCharSet(
128      &utf16[0], static_cast<uint32_t>(utf16.size()), "latin1",
129      PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, NULL, &utf8result_len);
130  ASSERT_TRUE(result == PP_TRUE);
131  ASSERT_TRUE(utf8result_len == 5);
132
133  // Giving too small of a buffer just fills in that many items and gives us
134  // the desired size.
135  output_buffer.resize(100);
136  utf8result_len = 2;
137  output_buffer[utf8result_len] = '$';  // Barrier character.
138  result = char_set_trusted_interface_->UTF16ToCharSet(
139      &utf16[0], static_cast<uint32_t>(utf16.size()), "latin1",
140      PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
141      &output_buffer[0], &utf8result_len);
142  ASSERT_TRUE(result == PP_TRUE);
143  ASSERT_TRUE(utf8result_len == 5);
144  ASSERT_TRUE(output_buffer[0] == 'h' && output_buffer[1] == 'e' &&
145              output_buffer[2] == '$');
146
147  // Try round-tripping some English & Chinese from UTF-8 through UTF-16
148  std::string utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd");
149  utf16 = UTF8ToUTF16(utf8source);
150  output_buffer.resize(100);
151  utf8result_len = static_cast<uint32_t>(output_buffer.size());
152  result = char_set_trusted_interface_->UTF16ToCharSet(
153      &utf16[0], static_cast<uint32_t>(utf16.size()),
154      "Utf-8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
155      &output_buffer[0], &utf8result_len);
156  ASSERT_TRUE(result == PP_TRUE);
157  output_buffer.resize(utf8result_len);
158  ASSERT_TRUE(utf8source == output_buffer);
159
160  // Test an un-encodable character with various modes.
161  utf16 = UTF8ToUTF16("h\xe4\xbd\xa0i");
162
163  // Fail mode, size should get 0'ed on failure.
164  output_buffer.resize(100);
165  utf8result_len = static_cast<uint32_t>(output_buffer.size());
166  result = char_set_trusted_interface_->UTF16ToCharSet(
167      &utf16[0], static_cast<uint32_t>(utf16.size()),
168      "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
169      &output_buffer[0], &utf8result_len);
170  ASSERT_TRUE(result == PP_FALSE);
171  ASSERT_TRUE(utf8result_len == 0);
172
173  // Skip mode.
174  output_buffer.resize(100);
175  utf8result_len = static_cast<uint32_t>(output_buffer.size());
176  result = char_set_trusted_interface_->UTF16ToCharSet(
177      &utf16[0], static_cast<uint32_t>(utf16.size()),
178      "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP,
179      &output_buffer[0], &utf8result_len);
180  ASSERT_TRUE(result == PP_TRUE);
181  ASSERT_TRUE(utf8result_len == 2);
182  ASSERT_TRUE(output_buffer[0] == 'h' && output_buffer[1] == 'i');
183
184  // Substitute mode.
185  output_buffer.resize(100);
186  utf8result_len = static_cast<uint32_t>(output_buffer.size());
187  result = char_set_trusted_interface_->UTF16ToCharSet(
188      &utf16[0], static_cast<uint32_t>(utf16.size()),
189      "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
190      &output_buffer[0], &utf8result_len);
191  ASSERT_TRUE(result == PP_TRUE);
192  ASSERT_TRUE(utf8result_len == 3);
193  output_buffer.resize(utf8result_len);
194  ASSERT_TRUE(output_buffer == "h?i");
195
196  // Try some invalid input encoding.
197  output_buffer.resize(100);
198  utf8result_len = static_cast<uint32_t>(output_buffer.size());
199  utf16.clear();
200  utf16.push_back(0xD800);  // High surrogate.
201  utf16.push_back('A');  // Not a low surrogate.
202  result = char_set_trusted_interface_->UTF16ToCharSet(
203      &utf16[0], static_cast<uint32_t>(utf16.size()),
204      "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
205      &output_buffer[0], &utf8result_len);
206  ASSERT_TRUE(result == PP_TRUE);
207  ASSERT_TRUE(utf8result_len == 2);
208  ASSERT_TRUE(output_buffer[0] == '?' && output_buffer[1] == 'A');
209
210  // Invalid encoding name.
211  output_buffer.resize(100);
212  utf8result_len = static_cast<uint32_t>(output_buffer.size());
213  result = char_set_trusted_interface_->UTF16ToCharSet(
214      &utf16[0], static_cast<uint32_t>(utf16.size()),
215      "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
216      &output_buffer[0], &utf8result_len);
217  ASSERT_TRUE(result == PP_FALSE);
218  ASSERT_TRUE(utf8result_len == 0);
219
220  PASS();
221}
222
223// TODO(brettw) remove this when the old interface is removed.
224std::string TestCharSet::TestCharSetToUTF16Deprecated() {
225  pp::Memory_Dev memory;
226
227  // Empty string.
228  uint32_t utf16result_len;
229  uint16_t* utf16result = char_set_interface_->CharSetToUTF16(
230      instance_->pp_instance(), "", 0, "latin1",
231      PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len);
232  ASSERT_TRUE(utf16result);
233  ASSERT_TRUE(utf16result_len == 0);
234  ASSERT_TRUE(utf16result[0] == 0);
235  memory.MemFree(utf16result);
236
237  // Basic Latin1.
238  char latin1[] = "H\xef";
239  utf16result = char_set_interface_->CharSetToUTF16(
240      instance_->pp_instance(), latin1, 2, "latin1",
241      PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len);
242  ASSERT_TRUE(utf16result);
243  ASSERT_TRUE(utf16result_len == 2);
244  ASSERT_TRUE(utf16result[0] == 'H' && utf16result[1] == 0xef &&
245              utf16result[2] == 0);
246  memory.MemFree(utf16result);
247
248  // Invalid input encoding with FAIL.
249  char badutf8[] = "A\xe4Z";
250  utf16result = char_set_interface_->CharSetToUTF16(
251      instance_->pp_instance(), badutf8, 3, "utf8",
252      PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len);
253  ASSERT_TRUE(!utf16result);
254  ASSERT_TRUE(utf16result_len == 0);
255  memory.MemFree(utf16result);
256
257  // Invalid input with SKIP.
258  utf16result = char_set_interface_->CharSetToUTF16(
259      instance_->pp_instance(), badutf8, 3, "utf8",
260      PP_CHARSET_CONVERSIONERROR_SKIP, &utf16result_len);
261  ASSERT_TRUE(utf16result);
262  ASSERT_TRUE(utf16result_len == 2);
263  ASSERT_TRUE(utf16result[0] == 'A' && utf16result[1] == 'Z' &&
264              utf16result[2] == 0);
265  memory.MemFree(utf16result);
266
267  // Invalid input with SUBSTITUTE.
268  utf16result = char_set_interface_->CharSetToUTF16(
269      instance_->pp_instance(), badutf8, 3, "utf8",
270      PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf16result_len);
271  ASSERT_TRUE(utf16result);
272  ASSERT_TRUE(utf16result_len == 3);
273  ASSERT_TRUE(utf16result[0] == 'A' && utf16result[1] == 0xFFFD &&
274              utf16result[2] == 'Z' && utf16result[3] == 0);
275  memory.MemFree(utf16result);
276
277  // Invalid encoding name.
278  utf16result = char_set_interface_->CharSetToUTF16(
279      instance_->pp_instance(), badutf8, 3, "poopiepants",
280      PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf16result_len);
281  ASSERT_TRUE(!utf16result);
282  ASSERT_TRUE(utf16result_len == 0);
283  memory.MemFree(utf16result);
284
285  PASS();
286}
287
288std::string TestCharSet::TestCharSetToUTF16() {
289  std::vector<uint16_t> output_buffer;
290  output_buffer.resize(100);
291
292  // Empty string.
293  output_buffer.resize(100);
294  uint32_t utf16result_len = static_cast<uint32_t>(output_buffer.size());
295  PP_Bool result = char_set_trusted_interface_->CharSetToUTF16(
296      "", 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
297      &output_buffer[0], &utf16result_len);
298  ASSERT_TRUE(result);
299  ASSERT_TRUE(utf16result_len == 0);
300  ASSERT_TRUE(output_buffer[0] == 0);
301
302  // Basic Latin1.
303  output_buffer.resize(100);
304  utf16result_len = static_cast<uint32_t>(output_buffer.size());
305  char latin1[] = "H\xef";
306  result = char_set_trusted_interface_->CharSetToUTF16(
307      latin1, 2, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
308      &output_buffer[0], &utf16result_len);
309  ASSERT_TRUE(result);
310  ASSERT_TRUE(utf16result_len == 2);
311  ASSERT_TRUE(output_buffer[0] == 'H' && output_buffer[1] == 0xef);
312
313  // Invalid input encoding with FAIL.
314  output_buffer.resize(100);
315  utf16result_len = static_cast<uint32_t>(output_buffer.size());
316  char badutf8[] = "A\xe4Z";
317  result = char_set_trusted_interface_->CharSetToUTF16(
318      badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
319      &output_buffer[0], &utf16result_len);
320  ASSERT_TRUE(!result);
321  ASSERT_TRUE(utf16result_len == 0);
322
323  // Invalid input with SKIP.
324  output_buffer.resize(100);
325  utf16result_len = static_cast<uint32_t>(output_buffer.size());
326  result = char_set_trusted_interface_->CharSetToUTF16(
327      badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP,
328      &output_buffer[0], &utf16result_len);
329  ASSERT_TRUE(result);
330  ASSERT_TRUE(utf16result_len == 2);
331  ASSERT_TRUE(output_buffer[0] == 'A' && output_buffer[1] == 'Z');
332
333  // Invalid input with SUBSTITUTE.
334  output_buffer.resize(100);
335  utf16result_len = static_cast<uint32_t>(output_buffer.size());
336  result = char_set_trusted_interface_->CharSetToUTF16(
337      badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
338      &output_buffer[0], &utf16result_len);
339  ASSERT_TRUE(result);
340  ASSERT_TRUE(utf16result_len == 3);
341  ASSERT_TRUE(output_buffer[0] == 'A' && output_buffer[1] == 0xFFFD &&
342              output_buffer[2] == 'Z');
343
344  // Invalid encoding name.
345  output_buffer.resize(100);
346  utf16result_len = static_cast<uint32_t>(output_buffer.size());
347  result = char_set_trusted_interface_->CharSetToUTF16(
348      badutf8, 3, "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
349      &output_buffer[0], &utf16result_len);
350  ASSERT_TRUE(!result);
351  ASSERT_TRUE(utf16result_len == 0);
352
353  PASS();
354}
355
356std::string TestCharSet::TestGetDefaultCharSet() {
357  // Test invalid instance.
358  pp::Var result(pp::PASS_REF, char_set_interface_->GetDefaultCharSet(0));
359  ASSERT_TRUE(result.is_undefined());
360
361  // Just make sure the default char set is a nonempty string.
362  result = pp::Var(pp::PASS_REF,
363      char_set_interface_->GetDefaultCharSet(instance_->pp_instance()));
364  ASSERT_TRUE(result.is_string());
365  ASSERT_FALSE(result.AsString().empty());
366
367  PASS();
368}
369
370std::vector<uint16_t> TestCharSet::UTF8ToUTF16(const std::string& utf8) {
371  uint32_t result_len = 0;
372  uint16_t* result = char_set_interface_->CharSetToUTF16(
373      instance_->pp_instance(), utf8.c_str(),
374      static_cast<uint32_t>(utf8.size()),
375      "utf-8", PP_CHARSET_CONVERSIONERROR_FAIL, &result_len);
376
377  std::vector<uint16_t> result_vector;
378  if (!result)
379    return result_vector;
380
381  result_vector.assign(result, &result[result_len]);
382  pp::Memory_Dev memory;
383  memory.MemFree(result);
384  return result_vector;
385}
386