net_string_util_icu.cc revision cedac228d2dd51db4b79ea1e72c7f249408ee061
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/base/net_string_util.h"
6
7#include "base/i18n/i18n_constants.h"
8#include "base/i18n/icu_string_conversions.h"
9#include "base/strings/string_util.h"
10#include "third_party/icu/source/common/unicode/ucnv.h"
11
12namespace net {
13
14const char* const kCharsetLatin1 = base::kCodepageLatin1;
15
16bool ConvertToUtf8(const std::string& text, const char* charset,
17                   std::string* output) {
18  output->clear();
19
20  UErrorCode err = U_ZERO_ERROR;
21  UConverter* converter(ucnv_open(charset, &err));
22  if (U_FAILURE(err))
23    return false;
24
25  // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8.
26  // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes
27  // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a
28  // trailing '\0'.
29  size_t output_length = text.length() * 3 + 1;
30  char* buf = WriteInto(output, output_length);
31  output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length,
32                                     text.data(), text.length(), &err);
33  ucnv_close(converter);
34  if (U_FAILURE(err)) {
35    output->clear();
36    return false;
37  }
38
39  output->resize(output_length);
40  return true;
41}
42
43bool ConvertToUtf8AndNormalize(const std::string& text, const char* charset,
44                               std::string* output) {
45  return base::ConvertToUtf8AndNormalize(text,  charset, output);
46}
47
48bool ConvertToUTF16(const std::string& text, const char* charset,
49                    base::string16* output) {
50  return base::CodepageToUTF16(text, charset,
51                               base::OnStringConversionError::FAIL, output);
52}
53
54bool ConvertToUTF16WithSubstitutions(const std::string& text,
55                                     const char* charset,
56                                     base::string16* output) {
57  return base::CodepageToUTF16(text, charset,
58                               base::OnStringConversionError::SUBSTITUTE,
59                               output);
60}
61
62}  // namespace net
63