test_char_set.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "ppapi/tests/test_char_set.h"
6
7#include "ppapi/c/dev/ppb_char_set_dev.h"
8#include "ppapi/c/trusted/ppb_char_set_trusted.h"
9#include "ppapi/cpp/dev/memory_dev.h"
10#include "ppapi/cpp/module.h"
11#include "ppapi/tests/testing_instance.h"
12
13REGISTER_TEST_CASE(CharSet);
14
15TestCharSet::TestCharSet(TestingInstance* instance)
16    : TestCase(instance),
17      char_set_interface_(NULL) {
18}
19
20bool TestCharSet::Init() {
21  char_set_interface_ = static_cast<const PPB_CharSet_Dev*>(
22      pp::Module::Get()->GetBrowserInterface(PPB_CHAR_SET_DEV_INTERFACE));
23  char_set_trusted_interface_ = static_cast<const PPB_CharSet_Trusted*>(
24      pp::Module::Get()->GetBrowserInterface(PPB_CHARSET_TRUSTED_INTERFACE));
25  return char_set_interface_ && char_set_trusted_interface_;
26}
27
28void TestCharSet::RunTests(const std::string& filter) {
29  RUN_TEST(UTF16ToCharSetDeprecated, filter);
30  RUN_TEST(UTF16ToCharSet, filter);
31  RUN_TEST(CharSetToUTF16Deprecated, filter);
32  RUN_TEST(CharSetToUTF16, filter);
33  RUN_TEST(GetDefaultCharSet, filter);
34}
35
36// TODO(brettw) remove this when the old interface is removed.
37std::string TestCharSet::TestUTF16ToCharSetDeprecated() {
38  // Empty string.
39  std::vector<uint16_t> utf16;
40  utf16.push_back(0);
41  uint32_t utf8result_len = 0;
42  pp::Memory_Dev memory;
43  char* utf8result = char_set_interface_->UTF16ToCharSet(
44      instance_->pp_instance(), &utf16[0], 0, "latin1",
45      PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
46  ASSERT_TRUE(utf8result);
47  ASSERT_TRUE(utf8result[0] == 0);
48  ASSERT_TRUE(utf8result_len == 0);
49  memory.MemFree(utf8result);
50
51  // Try round-tripping some English & Chinese from UTF-8 through UTF-16
52  std::string utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd");
53  utf16 = UTF8ToUTF16(utf8source);
54  utf8result = char_set_interface_->UTF16ToCharSet(
55      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
56      "Utf-8", PP_CHARSET_CONVERSIONERROR_FAIL, &utf8result_len);
57  ASSERT_TRUE(utf8source == std::string(utf8result, utf8result_len));
58  memory.MemFree(utf8result);
59
60  // Test an un-encodable character with various modes.
61  utf16 = UTF8ToUTF16("h\xe4\xbd\xa0i");
62
63  // Fail mode.
64  utf8result_len = 1234;  // Test that this gets 0'ed on failure.
65  utf8result = char_set_interface_->UTF16ToCharSet(
66      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
67      "latin1", PP_CHARSET_CONVERSIONERROR_FAIL, &utf8result_len);
68  ASSERT_TRUE(utf8result_len == 0);
69  ASSERT_TRUE(utf8result == NULL);
70
71  // Skip mode.
72  utf8result = char_set_interface_->UTF16ToCharSet(
73      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
74      "latin1", PP_CHARSET_CONVERSIONERROR_SKIP, &utf8result_len);
75  ASSERT_TRUE(utf8result_len == 2);
76  ASSERT_TRUE(utf8result[0] == 'h' && utf8result[1] == 'i' &&
77              utf8result[2] == 0);
78  memory.MemFree(utf8result);
79
80  // Substitute mode.
81  utf8result = char_set_interface_->UTF16ToCharSet(
82      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
83      "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
84  ASSERT_TRUE(utf8result_len == 3);
85  ASSERT_TRUE(utf8result[0] == 'h' && utf8result[1] == '?' &&
86              utf8result[2] == 'i' && utf8result[3] == 0);
87  memory.MemFree(utf8result);
88
89  // Try some invalid input encoding.
90  utf16.clear();
91  utf16.push_back(0xD800);  // High surrogate.
92  utf16.push_back('A');  // Not a low surrogate.
93  utf8result = char_set_interface_->UTF16ToCharSet(
94      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
95      "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
96  ASSERT_TRUE(utf8result_len == 2);
97  ASSERT_TRUE(utf8result[0] == '?' && utf8result[1] == 'A' &&
98              utf8result[2] == 0);
99  memory.MemFree(utf8result);
100
101  // Invalid encoding name.
102  utf8result = char_set_interface_->UTF16ToCharSet(
103      instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()),
104      "poopiepants", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len);
105  ASSERT_TRUE(!utf8result);
106  ASSERT_TRUE(utf8result_len == 0);
107
108  PASS();
109}
110
111std::string TestCharSet::TestUTF16ToCharSet() {
112  // Empty string.
113  std::vector<uint16_t> utf16;
114  utf16.push_back(0);
115  std::string output_buffer;
116  uint32_t utf8result_len = static_cast<uint32_t>(output_buffer.size());
117  PP_Bool result = char_set_trusted_interface_->UTF16ToCharSet(
118      &utf16[0], 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
119      &output_buffer[0], &utf8result_len);
120  ASSERT_TRUE(result == PP_TRUE);
121  ASSERT_TRUE(utf8result_len == 0);
122
123  // No output buffer returns length of string.
124  utf16 = UTF8ToUTF16("hello");
125  utf8result_len = 0;
126  result = char_set_trusted_interface_->UTF16ToCharSet(
127      &utf16[0], static_cast<uint32_t>(utf16.size()), "latin1",
128      PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, NULL, &utf8result_len);
129  ASSERT_TRUE(result == PP_TRUE);
130  ASSERT_TRUE(utf8result_len == 5);
131
132  // Giving too small of a buffer just fills in that many items and gives us
133  // the desired size.
134  output_buffer.resize(100);
135  utf8result_len = 2;
136  output_buffer[utf8result_len] = '$';  // Barrier character.
137  result = char_set_trusted_interface_->UTF16ToCharSet(
138      &utf16[0], static_cast<uint32_t>(utf16.size()), "latin1",
139      PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
140      &output_buffer[0], &utf8result_len);
141  ASSERT_TRUE(result == PP_TRUE);
142  ASSERT_TRUE(utf8result_len == 5);
143  ASSERT_TRUE(output_buffer[0] == 'h' && output_buffer[1] == 'e' &&
144              output_buffer[2] == '$');
145
146  // Try round-tripping some English & Chinese from UTF-8 through UTF-16
147  std::string utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd");
148  utf16 = UTF8ToUTF16(utf8source);
149  output_buffer.resize(100);
150  utf8result_len = static_cast<uint32_t>(output_buffer.size());
151  result = char_set_trusted_interface_->UTF16ToCharSet(
152      &utf16[0], static_cast<uint32_t>(utf16.size()),
153      "Utf-8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
154      &output_buffer[0], &utf8result_len);
155  ASSERT_TRUE(result == PP_TRUE);
156  output_buffer.resize(utf8result_len);
157  ASSERT_TRUE(utf8source == output_buffer);
158
159  // Test an un-encodable character with various modes.
160  utf16 = UTF8ToUTF16("h\xe4\xbd\xa0i");
161
162  // Fail mode, size should get 0'ed on failure.
163  output_buffer.resize(100);
164  utf8result_len = static_cast<uint32_t>(output_buffer.size());
165  result = char_set_trusted_interface_->UTF16ToCharSet(
166      &utf16[0], static_cast<uint32_t>(utf16.size()),
167      "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
168      &output_buffer[0], &utf8result_len);
169  ASSERT_TRUE(result == PP_FALSE);
170  ASSERT_TRUE(utf8result_len == 0);
171
172  // Skip mode.
173  output_buffer.resize(100);
174  utf8result_len = static_cast<uint32_t>(output_buffer.size());
175  result = char_set_trusted_interface_->UTF16ToCharSet(
176      &utf16[0], static_cast<uint32_t>(utf16.size()),
177      "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP,
178      &output_buffer[0], &utf8result_len);
179  ASSERT_TRUE(result == PP_TRUE);
180  ASSERT_TRUE(utf8result_len == 2);
181  ASSERT_TRUE(output_buffer[0] == 'h' && output_buffer[1] == 'i');
182
183  // Substitute mode.
184  output_buffer.resize(100);
185  utf8result_len = static_cast<uint32_t>(output_buffer.size());
186  result = char_set_trusted_interface_->UTF16ToCharSet(
187      &utf16[0], static_cast<uint32_t>(utf16.size()),
188      "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
189      &output_buffer[0], &utf8result_len);
190  ASSERT_TRUE(utf8result_len == 3);
191  output_buffer.resize(utf8result_len);
192  ASSERT_TRUE(output_buffer == "h?i");
193
194  // Try some invalid input encoding.
195  output_buffer.resize(100);
196  utf8result_len = static_cast<uint32_t>(output_buffer.size());
197  utf16.clear();
198  utf16.push_back(0xD800);  // High surrogate.
199  utf16.push_back('A');  // Not a low surrogate.
200  result = char_set_trusted_interface_->UTF16ToCharSet(
201      &utf16[0], static_cast<uint32_t>(utf16.size()),
202      "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
203      &output_buffer[0], &utf8result_len);
204  ASSERT_TRUE(utf8result_len == 2);
205  ASSERT_TRUE(output_buffer[0] == '?' && output_buffer[1] == 'A');
206
207  // Invalid encoding name.
208  output_buffer.resize(100);
209  utf8result_len = static_cast<uint32_t>(output_buffer.size());
210  result = char_set_trusted_interface_->UTF16ToCharSet(
211      &utf16[0], static_cast<uint32_t>(utf16.size()),
212      "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
213      &output_buffer[0], &utf8result_len);
214  ASSERT_TRUE(result == PP_FALSE);
215  ASSERT_TRUE(utf8result_len == 0);
216
217  PASS();
218}
219
220// TODO(brettw) remove this when the old interface is removed.
221std::string TestCharSet::TestCharSetToUTF16Deprecated() {
222  pp::Memory_Dev memory;
223
224  // Empty string.
225  uint32_t utf16result_len;
226  uint16_t* utf16result = char_set_interface_->CharSetToUTF16(
227      instance_->pp_instance(), "", 0, "latin1",
228      PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len);
229  ASSERT_TRUE(utf16result);
230  ASSERT_TRUE(utf16result_len == 0);
231  ASSERT_TRUE(utf16result[0] == 0);
232  memory.MemFree(utf16result);
233
234  // Basic Latin1.
235  char latin1[] = "H\xef";
236  utf16result = char_set_interface_->CharSetToUTF16(
237      instance_->pp_instance(), latin1, 2, "latin1",
238      PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len);
239  ASSERT_TRUE(utf16result);
240  ASSERT_TRUE(utf16result_len == 2);
241  ASSERT_TRUE(utf16result[0] == 'H' && utf16result[1] == 0xef &&
242              utf16result[2] == 0);
243  memory.MemFree(utf16result);
244
245  // Invalid input encoding with FAIL.
246  char badutf8[] = "A\xe4Z";
247  utf16result = char_set_interface_->CharSetToUTF16(
248      instance_->pp_instance(), badutf8, 3, "utf8",
249      PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len);
250  ASSERT_TRUE(!utf16result);
251  ASSERT_TRUE(utf16result_len == 0);
252  memory.MemFree(utf16result);
253
254  // Invalid input with SKIP.
255  utf16result = char_set_interface_->CharSetToUTF16(
256      instance_->pp_instance(), badutf8, 3, "utf8",
257      PP_CHARSET_CONVERSIONERROR_SKIP, &utf16result_len);
258  ASSERT_TRUE(utf16result);
259  ASSERT_TRUE(utf16result_len == 2);
260  ASSERT_TRUE(utf16result[0] == 'A' && utf16result[1] == 'Z' &&
261              utf16result[2] == 0);
262  memory.MemFree(utf16result);
263
264  // Invalid input with SUBSTITUTE.
265  utf16result = char_set_interface_->CharSetToUTF16(
266      instance_->pp_instance(), badutf8, 3, "utf8",
267      PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf16result_len);
268  ASSERT_TRUE(utf16result);
269  ASSERT_TRUE(utf16result_len == 3);
270  ASSERT_TRUE(utf16result[0] == 'A' && utf16result[1] == 0xFFFD &&
271              utf16result[2] == 'Z' && utf16result[3] == 0);
272  memory.MemFree(utf16result);
273
274  // Invalid encoding name.
275  utf16result = char_set_interface_->CharSetToUTF16(
276      instance_->pp_instance(), badutf8, 3, "poopiepants",
277      PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf16result_len);
278  ASSERT_TRUE(!utf16result);
279  ASSERT_TRUE(utf16result_len == 0);
280  memory.MemFree(utf16result);
281
282  PASS();
283}
284
285std::string TestCharSet::TestCharSetToUTF16() {
286  std::vector<uint16_t> output_buffer;
287  output_buffer.resize(100);
288
289  // Empty string.
290  output_buffer.resize(100);
291  uint32_t utf16result_len = static_cast<uint32_t>(output_buffer.size());
292  PP_Bool result = char_set_trusted_interface_->CharSetToUTF16(
293      "", 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
294      &output_buffer[0], &utf16result_len);
295  ASSERT_TRUE(result);
296  ASSERT_TRUE(utf16result_len == 0);
297  ASSERT_TRUE(output_buffer[0] == 0);
298
299  // Basic Latin1.
300  output_buffer.resize(100);
301  utf16result_len = static_cast<uint32_t>(output_buffer.size());
302  char latin1[] = "H\xef";
303  result = char_set_trusted_interface_->CharSetToUTF16(
304      latin1, 2, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
305      &output_buffer[0], &utf16result_len);
306  ASSERT_TRUE(result);
307  ASSERT_TRUE(utf16result_len == 2);
308  ASSERT_TRUE(output_buffer[0] == 'H' && output_buffer[1] == 0xef);
309
310  // Invalid input encoding with FAIL.
311  output_buffer.resize(100);
312  utf16result_len = static_cast<uint32_t>(output_buffer.size());
313  char badutf8[] = "A\xe4Z";
314  result = char_set_trusted_interface_->CharSetToUTF16(
315      badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL,
316      &output_buffer[0], &utf16result_len);
317  ASSERT_TRUE(!result);
318  ASSERT_TRUE(utf16result_len == 0);
319
320  // Invalid input with SKIP.
321  output_buffer.resize(100);
322  utf16result_len = static_cast<uint32_t>(output_buffer.size());
323  result = char_set_trusted_interface_->CharSetToUTF16(
324      badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP,
325      &output_buffer[0], &utf16result_len);
326  ASSERT_TRUE(result);
327  ASSERT_TRUE(utf16result_len == 2);
328  ASSERT_TRUE(output_buffer[0] == 'A' && output_buffer[1] == 'Z');
329
330  // Invalid input with SUBSTITUTE.
331  output_buffer.resize(100);
332  utf16result_len = static_cast<uint32_t>(output_buffer.size());
333  result = char_set_trusted_interface_->CharSetToUTF16(
334      badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
335      &output_buffer[0], &utf16result_len);
336  ASSERT_TRUE(result);
337  ASSERT_TRUE(utf16result_len == 3);
338  ASSERT_TRUE(output_buffer[0] == 'A' && output_buffer[1] == 0xFFFD &&
339              output_buffer[2] == 'Z');
340
341  // Invalid encoding name.
342  output_buffer.resize(100);
343  utf16result_len = static_cast<uint32_t>(output_buffer.size());
344  result = char_set_trusted_interface_->CharSetToUTF16(
345      badutf8, 3, "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE,
346      &output_buffer[0], &utf16result_len);
347  ASSERT_TRUE(!result);
348  ASSERT_TRUE(utf16result_len == 0);
349
350  PASS();
351}
352
353std::string TestCharSet::TestGetDefaultCharSet() {
354  // Test invalid instance.
355  pp::Var result(pp::PASS_REF, char_set_interface_->GetDefaultCharSet(0));
356  ASSERT_TRUE(result.is_undefined());
357
358  // Just make sure the default char set is a nonempty string.
359  result = pp::Var(pp::PASS_REF,
360      char_set_interface_->GetDefaultCharSet(instance_->pp_instance()));
361  ASSERT_TRUE(result.is_string());
362  ASSERT_FALSE(result.AsString().empty());
363
364  PASS();
365}
366
367std::vector<uint16_t> TestCharSet::UTF8ToUTF16(const std::string& utf8) {
368  uint32_t result_len = 0;
369  uint16_t* result = char_set_interface_->CharSetToUTF16(
370      instance_->pp_instance(), utf8.c_str(),
371      static_cast<uint32_t>(utf8.size()),
372      "utf-8", PP_CHARSET_CONVERSIONERROR_FAIL, &result_len);
373
374  std::vector<uint16_t> result_vector;
375  if (!result)
376    return result_vector;
377
378  result_vector.assign(result, &result[result_len]);
379  pp::Memory_Dev memory;
380  memory.MemFree(result);
381  return result_vector;
382}
383