net_string_util_icu.cc revision 5c02ac1a9c1b504631c0a3d2b6e737b5d738bae1
1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Copyright 2014 The Chromium Authors. All rights reserved.
2a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
3a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// found in the LICENSE file.
4a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
5a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "net/base/net_string_util.h"
6a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
7a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/i18n/i18n_constants.h"
8a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/i18n/icu_string_conversions.h"
9a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/strings/string_util.h"
10a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "third_party/icu/source/common/unicode/ucnv.h"
11a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
12a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace net {
13a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
14a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)bool ConvertToUtf8(const std::string& text, const char* charset,
15a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                   std::string* output) {
16a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  output->clear();
17a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
18a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  UErrorCode err = U_ZERO_ERROR;
19a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  UConverter* converter(ucnv_open(charset, &err));
20a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if (U_FAILURE(err))
21a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return false;
22a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
23a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8.
24a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes
25a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a
26a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // trailing '\0'.
27a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  size_t output_length = text.length() * 3 + 1;
28a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  char* buf = WriteInto(output, output_length);
29a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length,
30a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                     text.data(), text.length(), &err);
31a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  ucnv_close(converter);
32a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if (U_FAILURE(err)) {
33a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    output->clear();
34a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return false;
35a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
36a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
37a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  output->resize(output_length);
38a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return true;
39a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
40a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
41a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)bool ConvertToUtf8AndNormalize(const std::string& text, const char* charset,
42a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                               std::string* output) {
43a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return base::ConvertToUtf8AndNormalize(text,  charset, output);
44a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
45a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
46a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)bool ConvertLatin1ToUtf8AndNormalize(const std::string& text,
47                                    std::string* output) {
48  return net::ConvertToUtf8AndNormalize(text,  base::kCodepageLatin1, output);
49}
50
51bool ConvertToUTF16(const std::string& text, const char* charset,
52                    base::string16* output) {
53  return base::CodepageToUTF16(text, charset,
54                               base::OnStringConversionError::FAIL, output);
55}
56
57bool ConvertLatin1ToUTF16(const std::string& text, base::string16* output) {
58  return base::CodepageToUTF16(text, base::kCodepageLatin1,
59                               base::OnStringConversionError::FAIL, output);
60}
61
62}  // namespace net
63