1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file.
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/sys_string_conversions.h"
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <wchar.h>
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/string_piece.h"
10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/utf_string_conversions.h"
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace base {
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string SysWideToUTF8(const std::wstring& wide) {
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // In theory this should be using the system-provided conversion rather
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // than our ICU, but this will do for now.
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return WideToUTF8(wide);
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::wstring SysUTF8ToWide(const StringPiece& utf8) {
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // In theory this should be using the system-provided conversion rather
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // than our ICU, but this will do for now.
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::wstring out;
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  UTF8ToWide(utf8.data(), utf8.size(), &out);
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return out;
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#if defined(OS_CHROMEOS)
28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// ChromeOS always runs in UTF-8 locale.
30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstd::string SysWideToNativeMB(const std::wstring& wide) {
31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return WideToUTF8(wide);
32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstd::wstring SysNativeMBToWide(const StringPiece& native_mb) {
35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return SysUTF8ToWide(native_mb);
36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#else
39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string SysWideToNativeMB(const std::wstring& wide) {
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  mbstate_t ps;
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Calculate the number of multi-byte characters.  We walk through the string
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // without writing the output, counting the number of multi-byte characters.
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t num_out_chars = 0;
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  memset(&ps, 0, sizeof(ps));
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < wide.size(); ++i) {
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const wchar_t src = wide[i];
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Use a temp buffer since calling wcrtomb with an output of NULL does not
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // calculate the output length.
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    char buf[16];
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Skip NULLs to avoid wcrtomb's special handling of them.
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    size_t res = src ? wcrtomb(buf, src, &ps) : 0;
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    switch (res) {
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // Handle any errors and return an empty string.
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case static_cast<size_t>(-1):
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return std::string();
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case 0:
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // We hit an embedded null byte, keep going.
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        ++num_out_chars;
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      default:
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        num_out_chars += res;
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (num_out_chars == 0)
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return std::string();
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string out;
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  out.resize(num_out_chars);
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // We walk the input string again, with |i| tracking the index of the
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // wide input, and |j| tracking the multi-byte output.
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  memset(&ps, 0, sizeof(ps));
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0, j = 0; i < wide.size(); ++i) {
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const wchar_t src = wide[i];
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // We don't want wcrtomb to do it's funkiness for embedded NULLs.
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    switch (res) {
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // Handle any errors and return an empty string.
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case static_cast<size_t>(-1):
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return std::string();
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case 0:
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // We hit an embedded null byte, keep going.
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        ++j;  // Output is already zeroed.
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      default:
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        j += res;
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return out;
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::wstring SysNativeMBToWide(const StringPiece& native_mb) {
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  mbstate_t ps;
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Calculate the number of wide characters.  We walk through the string
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // without writing the output, counting the number of wide characters.
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t num_out_chars = 0;
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  memset(&ps, 0, sizeof(ps));
107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0; i < native_mb.size(); ) {
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* src = native_mb.data() + i;
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    switch (res) {
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // Handle any errors and return an empty string.
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case static_cast<size_t>(-2):
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case static_cast<size_t>(-1):
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return std::wstring();
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case 0:
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // We hit an embedded null byte, keep going.
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        i += 1;  // Fall through.
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      default:
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        i += res;
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        ++num_out_chars;
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (num_out_chars == 0)
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return std::wstring();
128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::wstring out;
130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  out.resize(num_out_chars);
131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  memset(&ps, 0, sizeof(ps));  // Clear the shift state.
133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // We walk the input string again, with |i| tracking the index of the
134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // multi-byte input, and |j| tracking the wide output.
135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* src = native_mb.data() + i;
137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    wchar_t* dst = &out[j];
138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    switch (res) {
140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // Handle any errors and return an empty string.
141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case static_cast<size_t>(-2):
142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case static_cast<size_t>(-1):
143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return std::wstring();
144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      case 0:
146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        i += 1;  // Skip null byte.
147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      default:
149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        i += res;
150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        break;
151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return out;
155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif  // OS_CHROMEOS
158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace base
160