1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "ppapi/shared_impl/private/ppb_char_set_shared.h"
6
7#include <algorithm>
8
9#include "base/i18n/icu_string_conversions.h"
10#include "ppapi/c/dev/ppb_memory_dev.h"
11#include "ppapi/thunk/thunk.h"
12#include "third_party/icu/source/common/unicode/ucnv.h"
13#include "third_party/icu/source/common/unicode/ucnv_cb.h"
14#include "third_party/icu/source/common/unicode/ucnv_err.h"
15#include "third_party/icu/source/common/unicode/ustring.h"
16
17namespace ppapi {
18
19namespace {
20
21PP_CharSet_Trusted_ConversionError DeprecatedToConversionError(
22    PP_CharSet_ConversionError on_error) {
23  switch (on_error) {
24    case PP_CHARSET_CONVERSIONERROR_SKIP:
25      return PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP;
26    case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE:
27      return PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE;
28    case PP_CHARSET_CONVERSIONERROR_FAIL:
29    default:
30      return PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL;
31  }
32}
33
34// Converts the given PP error handling behavior to the version in base,
35// placing the result in |*result| and returning true on success. Returns false
36// if the enum is invalid.
37bool PPToBaseConversionError(PP_CharSet_Trusted_ConversionError on_error,
38                             base::OnStringConversionError::Type* result) {
39  switch (on_error) {
40    case PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL:
41      *result = base::OnStringConversionError::FAIL;
42      return true;
43    case PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP:
44      *result = base::OnStringConversionError::SKIP;
45      return true;
46    case PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE:
47      *result = base::OnStringConversionError::SUBSTITUTE;
48      return true;
49    default:
50      return false;
51  }
52}
53
54}  // namespace
55
56// static
57// The "substitution" behavior of this function does not match the
58// implementation in base, so we partially duplicate the code from
59// icu_string_conversions.cc with the correct error handling setup required
60// by the PPAPI interface.
61char* PPB_CharSet_Shared::UTF16ToCharSetDeprecated(
62    const uint16_t* utf16,
63    uint32_t utf16_len,
64    const char* output_char_set,
65    PP_CharSet_ConversionError deprecated_on_error,
66    uint32_t* output_length) {
67  *output_length = 0;
68  PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError(
69      deprecated_on_error);
70
71  // Compute required length.
72  uint32_t required_length = 0;
73  UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error, NULL,
74                 &required_length);
75
76  // Our output is null terminated, so need one more byte.
77  char* ret_buf = static_cast<char*>(
78      thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(required_length + 1));
79
80  // Do the conversion into the buffer.
81  PP_Bool result = UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error,
82                                  ret_buf, &required_length);
83  if (result == PP_FALSE) {
84    thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf);
85    return NULL;
86  }
87  ret_buf[required_length] = 0;  // Null terminate.
88  *output_length = required_length;
89  return ret_buf;
90}
91
92// static
93PP_Bool PPB_CharSet_Shared::UTF16ToCharSet(
94    const uint16_t utf16[],
95    uint32_t utf16_len,
96    const char* output_char_set,
97    PP_CharSet_Trusted_ConversionError on_error,
98    char* output_buffer,
99    uint32_t* output_length) {
100  if (!utf16 || !output_char_set || !output_length) {
101    *output_length = 0;
102    return PP_FALSE;
103  }
104
105  UErrorCode status = U_ZERO_ERROR;
106  UConverter* converter = ucnv_open(output_char_set, &status);
107  if (!U_SUCCESS(status)) {
108    *output_length = 0;
109    return PP_FALSE;
110  }
111
112  // Setup our error handler.
113  switch (on_error) {
114    case PP_CHARSET_CONVERSIONERROR_FAIL:
115      ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, 0,
116                            NULL, NULL, &status);
117      break;
118    case PP_CHARSET_CONVERSIONERROR_SKIP:
119      ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, 0,
120                            NULL, NULL, &status);
121      break;
122    case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE: {
123      // ICU sets the substitution char for some character sets (like latin1)
124      // to be the ASCII "substitution character" (26). We want to use '?'
125      // instead for backwards-compat with Windows behavior.
126      char subst_chars[32];
127      int8_t subst_chars_len = 32;
128      ucnv_getSubstChars(converter, subst_chars, &subst_chars_len, &status);
129      if (subst_chars_len == 1 && subst_chars[0] == 26) {
130        // Override to the question mark character if possible. When using
131        // setSubstString, the input is a Unicode character. The function will
132        // try to convert it to the destination character set and fail if that
133        // can not be converted to the destination character set.
134        //
135        // We just ignore any failure. If the dest char set has no
136        // representation for '?', then we'll just stick to the ICU default
137        // substitution character.
138        UErrorCode subst_status = U_ZERO_ERROR;
139        UChar question_mark = '?';
140        ucnv_setSubstString(converter, &question_mark, 1, &subst_status);
141      }
142
143      ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
144                            NULL, NULL, &status);
145      break;
146    }
147    default:
148      *output_length = 0;
149      ucnv_close(converter);
150      return PP_FALSE;
151  }
152
153  // ucnv_fromUChars returns required size not including terminating null.
154  *output_length = static_cast<uint32_t>(ucnv_fromUChars(
155      converter, output_buffer, output_buffer ? *output_length : 0,
156      reinterpret_cast<const UChar*>(utf16), utf16_len, &status));
157
158  ucnv_close(converter);
159  if (status == U_BUFFER_OVERFLOW_ERROR) {
160    // Don't treat this as a fatal error since we need to return the string
161    // size.
162    return PP_TRUE;
163  } else if (!U_SUCCESS(status)) {
164    *output_length = 0;
165    return PP_FALSE;
166  }
167  return PP_TRUE;
168}
169
170// static
171uint16_t* PPB_CharSet_Shared::CharSetToUTF16Deprecated(
172    const char* input,
173    uint32_t input_len,
174    const char* input_char_set,
175    PP_CharSet_ConversionError deprecated_on_error,
176    uint32_t* output_length) {
177  *output_length = 0;
178  PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError(
179      deprecated_on_error);
180
181  // Compute required length.
182  uint32_t required_length = 0;
183  CharSetToUTF16(input, input_len, input_char_set, on_error, NULL,
184                 &required_length);
185
186  // Our output is null terminated, so need one more byte.
187  uint16_t* ret_buf = static_cast<uint16_t*>(
188      thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(
189          (required_length + 1) * sizeof(uint16_t)));
190
191  // Do the conversion into the buffer.
192  PP_Bool result = CharSetToUTF16(input, input_len, input_char_set, on_error,
193                                  ret_buf, &required_length);
194  if (result == PP_FALSE) {
195    thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf);
196    return NULL;
197  }
198  ret_buf[required_length] = 0;  // Null terminate.
199  *output_length = required_length;
200  return ret_buf;
201}
202
203PP_Bool PPB_CharSet_Shared::CharSetToUTF16(
204    const char* input,
205    uint32_t input_len,
206    const char* input_char_set,
207    PP_CharSet_Trusted_ConversionError on_error,
208    uint16_t* output_buffer,
209    uint32_t* output_utf16_length) {
210  if (!input || !input_char_set || !output_utf16_length) {
211    *output_utf16_length = 0;
212    return PP_FALSE;
213  }
214
215  base::OnStringConversionError::Type base_on_error;
216  if (!PPToBaseConversionError(on_error, &base_on_error)) {
217    *output_utf16_length = 0;
218    return PP_FALSE;  // Invalid enum value.
219  }
220
221  // We can convert this call to the implementation in base to avoid code
222  // duplication, although this does introduce an extra copy of the data.
223  base::string16 output;
224  if (!base::CodepageToUTF16(std::string(input, input_len), input_char_set,
225                             base_on_error, &output)) {
226    *output_utf16_length = 0;
227    return PP_FALSE;
228  }
229
230  if (output_buffer) {
231    memcpy(output_buffer, output.c_str(),
232           std::min(*output_utf16_length, static_cast<uint32_t>(output.size()))
233           * sizeof(uint16_t));
234  }
235  *output_utf16_length = static_cast<uint32_t>(output.size());
236  return PP_TRUE;
237}
238
239}  // namespace ppapi
240