1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/sys_string_conversions.h"
6
7#include <stddef.h>
8#include <wchar.h>
9
10#include "base/strings/string_piece.h"
11#include "base/strings/utf_string_conversions.h"
12#include "build/build_config.h"
13
14namespace base {
15
16std::string SysWideToUTF8(const std::wstring& wide) {
17  // In theory this should be using the system-provided conversion rather
18  // than our ICU, but this will do for now.
19  return WideToUTF8(wide);
20}
21std::wstring SysUTF8ToWide(const StringPiece& utf8) {
22  // In theory this should be using the system-provided conversion rather
23  // than our ICU, but this will do for now.
24  std::wstring out;
25  UTF8ToWide(utf8.data(), utf8.size(), &out);
26  return out;
27}
28
29#if defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID)
30// TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
31// support and a better understanding of what calls these routines.
32
33std::string SysWideToNativeMB(const std::wstring& wide) {
34  return WideToUTF8(wide);
35}
36
37std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
38  return SysUTF8ToWide(native_mb);
39}
40
41#else
42
43std::string SysWideToNativeMB(const std::wstring& wide) {
44  mbstate_t ps;
45
46  // Calculate the number of multi-byte characters.  We walk through the string
47  // without writing the output, counting the number of multi-byte characters.
48  size_t num_out_chars = 0;
49  memset(&ps, 0, sizeof(ps));
50  for (size_t i = 0; i < wide.size(); ++i) {
51    const wchar_t src = wide[i];
52    // Use a temp buffer since calling wcrtomb with an output of NULL does not
53    // calculate the output length.
54    char buf[16];
55    // Skip NULLs to avoid wcrtomb's special handling of them.
56    size_t res = src ? wcrtomb(buf, src, &ps) : 0;
57    switch (res) {
58      // Handle any errors and return an empty string.
59      case static_cast<size_t>(-1):
60        return std::string();
61        break;
62      case 0:
63        // We hit an embedded null byte, keep going.
64        ++num_out_chars;
65        break;
66      default:
67        num_out_chars += res;
68        break;
69    }
70  }
71
72  if (num_out_chars == 0)
73    return std::string();
74
75  std::string out;
76  out.resize(num_out_chars);
77
78  // We walk the input string again, with |i| tracking the index of the
79  // wide input, and |j| tracking the multi-byte output.
80  memset(&ps, 0, sizeof(ps));
81  for (size_t i = 0, j = 0; i < wide.size(); ++i) {
82    const wchar_t src = wide[i];
83    // We don't want wcrtomb to do its funkiness for embedded NULLs.
84    size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
85    switch (res) {
86      // Handle any errors and return an empty string.
87      case static_cast<size_t>(-1):
88        return std::string();
89        break;
90      case 0:
91        // We hit an embedded null byte, keep going.
92        ++j;  // Output is already zeroed.
93        break;
94      default:
95        j += res;
96        break;
97    }
98  }
99
100  return out;
101}
102
103std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
104  mbstate_t ps;
105
106  // Calculate the number of wide characters.  We walk through the string
107  // without writing the output, counting the number of wide characters.
108  size_t num_out_chars = 0;
109  memset(&ps, 0, sizeof(ps));
110  for (size_t i = 0; i < native_mb.size(); ) {
111    const char* src = native_mb.data() + i;
112    size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
113    switch (res) {
114      // Handle any errors and return an empty string.
115      case static_cast<size_t>(-2):
116      case static_cast<size_t>(-1):
117        return std::wstring();
118        break;
119      case 0:
120        // We hit an embedded null byte, keep going.
121        i += 1;  // Fall through.
122      default:
123        i += res;
124        ++num_out_chars;
125        break;
126    }
127  }
128
129  if (num_out_chars == 0)
130    return std::wstring();
131
132  std::wstring out;
133  out.resize(num_out_chars);
134
135  memset(&ps, 0, sizeof(ps));  // Clear the shift state.
136  // We walk the input string again, with |i| tracking the index of the
137  // multi-byte input, and |j| tracking the wide output.
138  for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
139    const char* src = native_mb.data() + i;
140    wchar_t* dst = &out[j];
141    size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
142    switch (res) {
143      // Handle any errors and return an empty string.
144      case static_cast<size_t>(-2):
145      case static_cast<size_t>(-1):
146        return std::wstring();
147        break;
148      case 0:
149        i += 1;  // Skip null byte.
150        break;
151      default:
152        i += res;
153        break;
154    }
155  }
156
157  return out;
158}
159
160#endif  // OS_CHROMEOS
161
162}  // namespace base
163