1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/sys_string_conversions.h" 6 7#include <wchar.h> 8 9#include "base/string_piece.h" 10#include "base/string_util.h" 11 12namespace base { 13 14std::string SysWideToUTF8(const std::wstring& wide) { 15 // In theory this should be using the system-provided conversion rather 16 // than our ICU, but this will do for now. 17 return WideToUTF8(wide); 18} 19std::wstring SysUTF8ToWide(const StringPiece& utf8) { 20 // In theory this should be using the system-provided conversion rather 21 // than our ICU, but this will do for now. 22 std::wstring out; 23 UTF8ToWide(utf8.data(), utf8.size(), &out); 24 return out; 25} 26 27std::string SysWideToNativeMB(const std::wstring& wide) { 28 mbstate_t ps; 29 30 // Calculate the number of multi-byte characters. We walk through the string 31 // without writing the output, counting the number of multi-byte characters. 32 size_t num_out_chars = 0; 33 memset(&ps, 0, sizeof(ps)); 34 for (size_t i = 0; i < wide.size(); ++i) { 35 const wchar_t src = wide[i]; 36 // Use a temp buffer since calling wcrtomb with an output of NULL does not 37 // calculate the output length. 38 char buf[16]; 39 // Skip NULLs to avoid wcrtomb's special handling of them. 40 size_t res = src ? wcrtomb(buf, src, &ps) : 0; 41 switch (res) { 42 // Handle any errors and return an empty string. 43 case static_cast<size_t>(-1): 44 return std::string(); 45 break; 46 case 0: 47 // We hit an embedded null byte, keep going. 48 ++num_out_chars; 49 break; 50 default: 51 num_out_chars += res; 52 break; 53 } 54 } 55 56 if (num_out_chars == 0) 57 return std::string(); 58 59 std::string out; 60 out.resize(num_out_chars); 61 62 // We walk the input string again, with |i| tracking the index of the 63 // wide input, and |j| tracking the multi-byte output. 64 memset(&ps, 0, sizeof(ps)); 65 for (size_t i = 0, j = 0; i < wide.size(); ++i) { 66 const wchar_t src = wide[i]; 67 // We don't want wcrtomb to do it's funkiness for embedded NULLs. 68 size_t res = src ? wcrtomb(&out[j], src, &ps) : 0; 69 switch (res) { 70 // Handle any errors and return an empty string. 71 case static_cast<size_t>(-1): 72 return std::string(); 73 break; 74 case 0: 75 // We hit an embedded null byte, keep going. 76 ++j; // Output is already zeroed. 77 break; 78 default: 79 j += res; 80 break; 81 } 82 } 83 84 return out; 85} 86 87std::wstring SysNativeMBToWide(const StringPiece& native_mb) { 88 mbstate_t ps; 89 90 // Calculate the number of wide characters. We walk through the string 91 // without writing the output, counting the number of wide characters. 92 size_t num_out_chars = 0; 93 memset(&ps, 0, sizeof(ps)); 94 for (size_t i = 0; i < native_mb.size(); ) { 95 const char* src = native_mb.data() + i; 96 size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps); 97 switch (res) { 98 // Handle any errors and return an empty string. 99 case static_cast<size_t>(-2): 100 case static_cast<size_t>(-1): 101 return std::wstring(); 102 break; 103 case 0: 104 // We hit an embedded null byte, keep going. 105 i += 1; // Fall through. 106 default: 107 i += res; 108 ++num_out_chars; 109 break; 110 } 111 } 112 113 if (num_out_chars == 0) 114 return std::wstring(); 115 116 std::wstring out; 117 out.resize(num_out_chars); 118 119 memset(&ps, 0, sizeof(ps)); // Clear the shift state. 120 // We walk the input string again, with |i| tracking the index of the 121 // multi-byte input, and |j| tracking the wide output. 122 for (size_t i = 0, j = 0; i < native_mb.size(); ++j) { 123 const char* src = native_mb.data() + i; 124 wchar_t* dst = &out[j]; 125 size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps); 126 switch (res) { 127 // Handle any errors and return an empty string. 128 case static_cast<size_t>(-2): 129 case static_cast<size_t>(-1): 130 return std::wstring(); 131 break; 132 case 0: 133 i += 1; // Skip null byte. 134 break; 135 default: 136 i += res; 137 break; 138 } 139 } 140 141 return out; 142} 143 144} // namespace base 145