1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file. 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/sys_string_conversions.h" 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <wchar.h> 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/string_piece.h" 10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/utf_string_conversions.h" 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace base { 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string SysWideToUTF8(const std::wstring& wide) { 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // In theory this should be using the system-provided conversion rather 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // than our ICU, but this will do for now. 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return WideToUTF8(wide); 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::wstring SysUTF8ToWide(const StringPiece& utf8) { 20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // In theory this should be using the system-provided conversion rather 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // than our ICU, but this will do for now. 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::wstring out; 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott UTF8ToWide(utf8.data(), utf8.size(), &out); 24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return out; 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#if defined(OS_CHROMEOS) 28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// ChromeOS always runs in UTF-8 locale. 30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstd::string SysWideToNativeMB(const std::wstring& wide) { 31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return WideToUTF8(wide); 32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstd::wstring SysNativeMBToWide(const StringPiece& native_mb) { 35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return SysUTF8ToWide(native_mb); 36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#else 39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string SysWideToNativeMB(const std::wstring& wide) { 41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott mbstate_t ps; 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Calculate the number of multi-byte characters. We walk through the string 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // without writing the output, counting the number of multi-byte characters. 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t num_out_chars = 0; 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott memset(&ps, 0, sizeof(ps)); 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (size_t i = 0; i < wide.size(); ++i) { 48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const wchar_t src = wide[i]; 49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Use a temp buffer since calling wcrtomb with an output of NULL does not 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // calculate the output length. 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott char buf[16]; 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Skip NULLs to avoid wcrtomb's special handling of them. 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t res = src ? wcrtomb(buf, src, &ps) : 0; 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott switch (res) { 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Handle any errors and return an empty string. 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case static_cast<size_t>(-1): 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return std::string(); 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case 0: 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // We hit an embedded null byte, keep going. 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ++num_out_chars; 62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott default: 64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott num_out_chars += res; 65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (num_out_chars == 0) 70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return std::string(); 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string out; 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott out.resize(num_out_chars); 74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // We walk the input string again, with |i| tracking the index of the 76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // wide input, and |j| tracking the multi-byte output. 77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott memset(&ps, 0, sizeof(ps)); 78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (size_t i = 0, j = 0; i < wide.size(); ++i) { 79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const wchar_t src = wide[i]; 80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // We don't want wcrtomb to do it's funkiness for embedded NULLs. 81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t res = src ? wcrtomb(&out[j], src, &ps) : 0; 82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott switch (res) { 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Handle any errors and return an empty string. 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case static_cast<size_t>(-1): 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return std::string(); 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case 0: 88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // We hit an embedded null byte, keep going. 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ++j; // Output is already zeroed. 90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott default: 92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott j += res; 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return out; 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::wstring SysNativeMBToWide(const StringPiece& native_mb) { 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott mbstate_t ps; 102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Calculate the number of wide characters. We walk through the string 104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // without writing the output, counting the number of wide characters. 105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t num_out_chars = 0; 106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott memset(&ps, 0, sizeof(ps)); 107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (size_t i = 0; i < native_mb.size(); ) { 108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* src = native_mb.data() + i; 109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps); 110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott switch (res) { 111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Handle any errors and return an empty string. 112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case static_cast<size_t>(-2): 113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case static_cast<size_t>(-1): 114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return std::wstring(); 115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case 0: 117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // We hit an embedded null byte, keep going. 118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott i += 1; // Fall through. 119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott default: 120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott i += res; 121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ++num_out_chars; 122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (num_out_chars == 0) 127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return std::wstring(); 128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::wstring out; 130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott out.resize(num_out_chars); 131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott memset(&ps, 0, sizeof(ps)); // Clear the shift state. 133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // We walk the input string again, with |i| tracking the index of the 134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // multi-byte input, and |j| tracking the wide output. 135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (size_t i = 0, j = 0; i < native_mb.size(); ++j) { 136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* src = native_mb.data() + i; 137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott wchar_t* dst = &out[j]; 138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps); 139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott switch (res) { 140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Handle any errors and return an empty string. 141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case static_cast<size_t>(-2): 142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case static_cast<size_t>(-1): 143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return std::wstring(); 144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott case 0: 146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott i += 1; // Skip null byte. 147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott default: 149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott i += res; 150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return out; 155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif // OS_CHROMEOS 158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace base 160