sys_string_conversions_linux.cc revision c7f5f8508d98d5952d42ed7648c2a8f30a4da156
1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/sys_string_conversions.h"
6
7#include <wchar.h>
8
9#include "base/string_piece.h"
10#include "base/string_util.h"
11
12namespace base {
13
14std::string SysWideToUTF8(const std::wstring& wide) {
15  // In theory this should be using the system-provided conversion rather
16  // than our ICU, but this will do for now.
17  return WideToUTF8(wide);
18}
19std::wstring SysUTF8ToWide(const StringPiece& utf8) {
20  // In theory this should be using the system-provided conversion rather
21  // than our ICU, but this will do for now.
22  std::wstring out;
23  UTF8ToWide(utf8.data(), utf8.size(), &out);
24  return out;
25}
26
27std::string SysWideToNativeMB(const std::wstring& wide) {
28  mbstate_t ps;
29
30  // Calculate the number of multi-byte characters.  We walk through the string
31  // without writing the output, counting the number of multi-byte characters.
32  size_t num_out_chars = 0;
33  memset(&ps, 0, sizeof(ps));
34  for (size_t i = 0; i < wide.size(); ++i) {
35    const wchar_t src = wide[i];
36    // Use a temp buffer since calling wcrtomb with an output of NULL does not
37    // calculate the output length.
38    char buf[16];
39    // Skip NULLs to avoid wcrtomb's special handling of them.
40    size_t res = src ? wcrtomb(buf, src, &ps) : 0;
41    switch (res) {
42      // Handle any errors and return an empty string.
43      case static_cast<size_t>(-1):
44        return std::string();
45        break;
46      case 0:
47        // We hit an embedded null byte, keep going.
48        ++num_out_chars;
49        break;
50      default:
51        num_out_chars += res;
52        break;
53    }
54  }
55
56  if (num_out_chars == 0)
57    return std::string();
58
59  std::string out;
60  out.resize(num_out_chars);
61
62  // We walk the input string again, with |i| tracking the index of the
63  // wide input, and |j| tracking the multi-byte output.
64  memset(&ps, 0, sizeof(ps));
65  for (size_t i = 0, j = 0; i < wide.size(); ++i) {
66    const wchar_t src = wide[i];
67    // We don't want wcrtomb to do it's funkiness for embedded NULLs.
68    size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
69    switch (res) {
70      // Handle any errors and return an empty string.
71      case static_cast<size_t>(-1):
72        return std::string();
73        break;
74      case 0:
75        // We hit an embedded null byte, keep going.
76        ++j;  // Output is already zeroed.
77        break;
78      default:
79        j += res;
80        break;
81    }
82  }
83
84  return out;
85}
86
87std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
88  mbstate_t ps;
89
90  // Calculate the number of wide characters.  We walk through the string
91  // without writing the output, counting the number of wide characters.
92  size_t num_out_chars = 0;
93  memset(&ps, 0, sizeof(ps));
94  for (size_t i = 0; i < native_mb.size(); ) {
95    const char* src = native_mb.data() + i;
96    size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
97    switch (res) {
98      // Handle any errors and return an empty string.
99      case static_cast<size_t>(-2):
100      case static_cast<size_t>(-1):
101        return std::wstring();
102        break;
103      case 0:
104        // We hit an embedded null byte, keep going.
105        i += 1;  // Fall through.
106      default:
107        i += res;
108        ++num_out_chars;
109        break;
110    }
111  }
112
113  if (num_out_chars == 0)
114    return std::wstring();
115
116  std::wstring out;
117  out.resize(num_out_chars);
118
119  memset(&ps, 0, sizeof(ps));  // Clear the shift state.
120  // We walk the input string again, with |i| tracking the index of the
121  // multi-byte input, and |j| tracking the wide output.
122  for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
123    const char* src = native_mb.data() + i;
124    wchar_t* dst = &out[j];
125    size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
126    switch (res) {
127      // Handle any errors and return an empty string.
128      case static_cast<size_t>(-2):
129      case static_cast<size_t>(-1):
130        return std::wstring();
131        break;
132      case 0:
133        i += 1;  // Skip null byte.
134        break;
135      default:
136        i += res;
137        break;
138    }
139  }
140
141  return out;
142}
143
144}  // namespace base
145