15c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu// Copyright 2014 The Chromium Authors. All rights reserved.
25c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu// Use of this source code is governed by a BSD-style license that can be
35c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu// found in the LICENSE file.
45c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
55c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu#include "net/base/net_string_util.h"
65c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
75c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu#include "base/i18n/i18n_constants.h"
85c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu#include "base/i18n/icu_string_conversions.h"
95c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu#include "base/strings/string_util.h"
105c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu#include "third_party/icu/source/common/unicode/ucnv.h"
115c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
125c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liunamespace net {
135c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
14cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)const char* const kCharsetLatin1 = base::kCodepageLatin1;
15cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
165c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liubool ConvertToUtf8(const std::string& text, const char* charset,
175c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                   std::string* output) {
185c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  output->clear();
195c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
205c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  UErrorCode err = U_ZERO_ERROR;
215c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  UConverter* converter(ucnv_open(charset, &err));
225c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  if (U_FAILURE(err))
235c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu    return false;
245c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
255c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8.
265c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes
275c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a
285c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  // trailing '\0'.
295c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  size_t output_length = text.length() * 3 + 1;
305c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  char* buf = WriteInto(output, output_length);
315c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length,
325c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                                     text.data(), text.length(), &err);
335c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  ucnv_close(converter);
345c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  if (U_FAILURE(err)) {
355c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu    output->clear();
365c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu    return false;
375c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  }
385c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
395c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  output->resize(output_length);
405c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  return true;
415c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu}
425c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
435c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liubool ConvertToUtf8AndNormalize(const std::string& text, const char* charset,
445c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                               std::string* output) {
455c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  return base::ConvertToUtf8AndNormalize(text,  charset, output);
465c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu}
475c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
485c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liubool ConvertToUTF16(const std::string& text, const char* charset,
495c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                    base::string16* output) {
505c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  return base::CodepageToUTF16(text, charset,
515c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                               base::OnStringConversionError::FAIL, output);
525c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu}
535c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
54cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)bool ConvertToUTF16WithSubstitutions(const std::string& text,
55cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                                     const char* charset,
56cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                                     base::string16* output) {
57cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  return base::CodepageToUTF16(text, charset,
58cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                               base::OnStringConversionError::SUBSTITUTE,
59cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                               output);
605c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu}
615c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu
625c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu}  // namespace net
63