sys_string_conversions_linux.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/sys_string_conversions.h"
6
7#include <wchar.h>
8
9#include "base/string_piece.h"
10#include "base/utf_string_conversions.h"
11
12namespace base {
13
14std::string SysWideToUTF8(const std::wstring& wide) {
15  // In theory this should be using the system-provided conversion rather
16  // than our ICU, but this will do for now.
17  return WideToUTF8(wide);
18}
19std::wstring SysUTF8ToWide(const StringPiece& utf8) {
20  // In theory this should be using the system-provided conversion rather
21  // than our ICU, but this will do for now.
22  std::wstring out;
23  UTF8ToWide(utf8.data(), utf8.size(), &out);
24  return out;
25}
26
27#if defined(OS_CHROMEOS)
28
29// ChromeOS always runs in UTF-8 locale.
30std::string SysWideToNativeMB(const std::wstring& wide) {
31  return WideToUTF8(wide);
32}
33
34std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
35  return SysUTF8ToWide(native_mb);
36}
37
38#else
39
40std::string SysWideToNativeMB(const std::wstring& wide) {
41  mbstate_t ps;
42
43  // Calculate the number of multi-byte characters.  We walk through the string
44  // without writing the output, counting the number of multi-byte characters.
45  size_t num_out_chars = 0;
46  memset(&ps, 0, sizeof(ps));
47  for (size_t i = 0; i < wide.size(); ++i) {
48    const wchar_t src = wide[i];
49    // Use a temp buffer since calling wcrtomb with an output of NULL does not
50    // calculate the output length.
51    char buf[16];
52    // Skip NULLs to avoid wcrtomb's special handling of them.
53    size_t res = src ? wcrtomb(buf, src, &ps) : 0;
54    switch (res) {
55      // Handle any errors and return an empty string.
56      case static_cast<size_t>(-1):
57        return std::string();
58        break;
59      case 0:
60        // We hit an embedded null byte, keep going.
61        ++num_out_chars;
62        break;
63      default:
64        num_out_chars += res;
65        break;
66    }
67  }
68
69  if (num_out_chars == 0)
70    return std::string();
71
72  std::string out;
73  out.resize(num_out_chars);
74
75  // We walk the input string again, with |i| tracking the index of the
76  // wide input, and |j| tracking the multi-byte output.
77  memset(&ps, 0, sizeof(ps));
78  for (size_t i = 0, j = 0; i < wide.size(); ++i) {
79    const wchar_t src = wide[i];
80    // We don't want wcrtomb to do it's funkiness for embedded NULLs.
81    size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
82    switch (res) {
83      // Handle any errors and return an empty string.
84      case static_cast<size_t>(-1):
85        return std::string();
86        break;
87      case 0:
88        // We hit an embedded null byte, keep going.
89        ++j;  // Output is already zeroed.
90        break;
91      default:
92        j += res;
93        break;
94    }
95  }
96
97  return out;
98}
99
100std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
101  mbstate_t ps;
102
103  // Calculate the number of wide characters.  We walk through the string
104  // without writing the output, counting the number of wide characters.
105  size_t num_out_chars = 0;
106  memset(&ps, 0, sizeof(ps));
107  for (size_t i = 0; i < native_mb.size(); ) {
108    const char* src = native_mb.data() + i;
109    size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
110    switch (res) {
111      // Handle any errors and return an empty string.
112      case static_cast<size_t>(-2):
113      case static_cast<size_t>(-1):
114        return std::wstring();
115        break;
116      case 0:
117        // We hit an embedded null byte, keep going.
118        i += 1;  // Fall through.
119      default:
120        i += res;
121        ++num_out_chars;
122        break;
123    }
124  }
125
126  if (num_out_chars == 0)
127    return std::wstring();
128
129  std::wstring out;
130  out.resize(num_out_chars);
131
132  memset(&ps, 0, sizeof(ps));  // Clear the shift state.
133  // We walk the input string again, with |i| tracking the index of the
134  // multi-byte input, and |j| tracking the wide output.
135  for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
136    const char* src = native_mb.data() + i;
137    wchar_t* dst = &out[j];
138    size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
139    switch (res) {
140      // Handle any errors and return an empty string.
141      case static_cast<size_t>(-2):
142      case static_cast<size_t>(-1):
143        return std::wstring();
144        break;
145      case 0:
146        i += 1;  // Skip null byte.
147        break;
148      default:
149        i += res;
150        break;
151    }
152  }
153
154  return out;
155}
156
157#endif  // OS_CHROMEOS
158
159}  // namespace base
160