net_string_util_icu.cc revision 5c02ac1a9c1b504631c0a3d2b6e737b5d738bae1
1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Copyright 2014 The Chromium Authors. All rights reserved. 2a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// found in the LICENSE file. 4a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 5a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "net/base/net_string_util.h" 6a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 7a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/i18n/i18n_constants.h" 8a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/i18n/icu_string_conversions.h" 9a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/strings/string_util.h" 10a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "third_party/icu/source/common/unicode/ucnv.h" 11a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 12a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace net { 13a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 14a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)bool ConvertToUtf8(const std::string& text, const char* charset, 15a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) std::string* output) { 16a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) output->clear(); 17a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 18a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) UErrorCode err = U_ZERO_ERROR; 19a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) UConverter* converter(ucnv_open(charset, &err)); 20a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if (U_FAILURE(err)) 21a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return false; 22a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 23a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8. 24a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes 25a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a 26a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // trailing '\0'. 27a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) size_t output_length = text.length() * 3 + 1; 28a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) char* buf = WriteInto(output, output_length); 29a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length, 30a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) text.data(), text.length(), &err); 31a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) ucnv_close(converter); 32a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if (U_FAILURE(err)) { 33a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) output->clear(); 34a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return false; 35a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) } 36a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 37a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) output->resize(output_length); 38a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return true; 39a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} 40a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 41a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)bool ConvertToUtf8AndNormalize(const std::string& text, const char* charset, 42a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) std::string* output) { 43a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return base::ConvertToUtf8AndNormalize(text, charset, output); 44a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} 45a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 46a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)bool ConvertLatin1ToUtf8AndNormalize(const std::string& text, 47 std::string* output) { 48 return net::ConvertToUtf8AndNormalize(text, base::kCodepageLatin1, output); 49} 50 51bool ConvertToUTF16(const std::string& text, const char* charset, 52 base::string16* output) { 53 return base::CodepageToUTF16(text, charset, 54 base::OnStringConversionError::FAIL, output); 55} 56 57bool ConvertLatin1ToUTF16(const std::string& text, base::string16* output) { 58 return base::CodepageToUTF16(text, base::kCodepageLatin1, 59 base::OnStringConversionError::FAIL, output); 60} 61 62} // namespace net 63