1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/sys_string_conversions.h"
6
7#include <wchar.h>
8
9#include "base/strings/string_piece.h"
10#include "base/strings/utf_string_conversions.h"
11
12namespace base {
13
14std::string SysWideToUTF8(const std::wstring& wide) {
15  // In theory this should be using the system-provided conversion rather
16  // than our ICU, but this will do for now.
17  return WideToUTF8(wide);
18}
19std::wstring SysUTF8ToWide(const StringPiece& utf8) {
20  // In theory this should be using the system-provided conversion rather
21  // than our ICU, but this will do for now.
22  std::wstring out;
23  UTF8ToWide(utf8.data(), utf8.size(), &out);
24  return out;
25}
26
27#if defined(OS_CHROMEOS) || defined(OS_ANDROID)
28// TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
29// support and a better understanding of what calls these routines.
30
31// ChromeOS always runs in UTF-8 locale.
32std::string SysWideToNativeMB(const std::wstring& wide) {
33  return WideToUTF8(wide);
34}
35
36std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
37  return SysUTF8ToWide(native_mb);
38}
39
40#else
41
42std::string SysWideToNativeMB(const std::wstring& wide) {
43  mbstate_t ps;
44
45  // Calculate the number of multi-byte characters.  We walk through the string
46  // without writing the output, counting the number of multi-byte characters.
47  size_t num_out_chars = 0;
48  memset(&ps, 0, sizeof(ps));
49  for (size_t i = 0; i < wide.size(); ++i) {
50    const wchar_t src = wide[i];
51    // Use a temp buffer since calling wcrtomb with an output of NULL does not
52    // calculate the output length.
53    char buf[16];
54    // Skip NULLs to avoid wcrtomb's special handling of them.
55    size_t res = src ? wcrtomb(buf, src, &ps) : 0;
56    switch (res) {
57      // Handle any errors and return an empty string.
58      case static_cast<size_t>(-1):
59        return std::string();
60        break;
61      case 0:
62        // We hit an embedded null byte, keep going.
63        ++num_out_chars;
64        break;
65      default:
66        num_out_chars += res;
67        break;
68    }
69  }
70
71  if (num_out_chars == 0)
72    return std::string();
73
74  std::string out;
75  out.resize(num_out_chars);
76
77  // We walk the input string again, with |i| tracking the index of the
78  // wide input, and |j| tracking the multi-byte output.
79  memset(&ps, 0, sizeof(ps));
80  for (size_t i = 0, j = 0; i < wide.size(); ++i) {
81    const wchar_t src = wide[i];
82    // We don't want wcrtomb to do its funkiness for embedded NULLs.
83    size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
84    switch (res) {
85      // Handle any errors and return an empty string.
86      case static_cast<size_t>(-1):
87        return std::string();
88        break;
89      case 0:
90        // We hit an embedded null byte, keep going.
91        ++j;  // Output is already zeroed.
92        break;
93      default:
94        j += res;
95        break;
96    }
97  }
98
99  return out;
100}
101
102std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
103  mbstate_t ps;
104
105  // Calculate the number of wide characters.  We walk through the string
106  // without writing the output, counting the number of wide characters.
107  size_t num_out_chars = 0;
108  memset(&ps, 0, sizeof(ps));
109  for (size_t i = 0; i < native_mb.size(); ) {
110    const char* src = native_mb.data() + i;
111    size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
112    switch (res) {
113      // Handle any errors and return an empty string.
114      case static_cast<size_t>(-2):
115      case static_cast<size_t>(-1):
116        return std::wstring();
117        break;
118      case 0:
119        // We hit an embedded null byte, keep going.
120        i += 1;  // Fall through.
121      default:
122        i += res;
123        ++num_out_chars;
124        break;
125    }
126  }
127
128  if (num_out_chars == 0)
129    return std::wstring();
130
131  std::wstring out;
132  out.resize(num_out_chars);
133
134  memset(&ps, 0, sizeof(ps));  // Clear the shift state.
135  // We walk the input string again, with |i| tracking the index of the
136  // multi-byte input, and |j| tracking the wide output.
137  for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
138    const char* src = native_mb.data() + i;
139    wchar_t* dst = &out[j];
140    size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
141    switch (res) {
142      // Handle any errors and return an empty string.
143      case static_cast<size_t>(-2):
144      case static_cast<size_t>(-1):
145        return std::wstring();
146        break;
147      case 0:
148        i += 1;  // Skip null byte.
149        break;
150      default:
151        i += res;
152        break;
153    }
154  }
155
156  return out;
157}
158
159#endif  // OS_CHROMEOS
160
161}  // namespace base
162