utf_offset_string_conversions.cc revision c7f5f8508d98d5952d42ed7648c2a8f30a4da156
1// Copyright (c) 2009 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/utf_offset_string_conversions.h" 6 7#include "base/string_piece.h" 8#include "base/utf_string_conversion_utils.h" 9 10using base::PrepareForUTF16Or32Output; 11using base::ReadUnicodeCharacter; 12using base::WriteUnicodeCharacter; 13 14// Generalized Unicode converter ----------------------------------------------- 15 16// Converts the given source Unicode character type to the given destination 17// Unicode character type as a STL string. The given input buffer and size 18// determine the source, and the given output STL string will be replaced by 19// the result. 20template<typename SRC_CHAR> 21bool ConvertUnicode(const SRC_CHAR* src, 22 size_t src_len, 23 std::wstring* output, 24 size_t* offset_for_adjustment) { 25 size_t output_offset = 26 (offset_for_adjustment && *offset_for_adjustment < src_len) ? 27 *offset_for_adjustment : std::wstring::npos; 28 29 // ICU requires 32-bit numbers. 30 bool success = true; 31 int32 src_len32 = static_cast<int32>(src_len); 32 for (int32 i = 0; i < src_len32; i++) { 33 uint32 code_point; 34 size_t original_i = i; 35 size_t chars_written = 0; 36 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 37 chars_written = WriteUnicodeCharacter(code_point, output); 38 } else { 39 chars_written = WriteUnicodeCharacter(0xFFFD, output); 40 success = false; 41 } 42 if ((output_offset != std::wstring::npos) && 43 (*offset_for_adjustment > original_i)) { 44 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last 45 // character read, not after it (so that incrementing it in the loop 46 // increment will place it at the right location), so we need to account 47 // for that in determining the amount that was read. 48 if (*offset_for_adjustment <= static_cast<size_t>(i)) 49 output_offset = std::wstring::npos; 50 else 51 output_offset += chars_written - (i - original_i + 1); 52 } 53 } 54 55 if (offset_for_adjustment) 56 *offset_for_adjustment = output_offset; 57 return success; 58} 59 60// UTF-8 <-> Wide -------------------------------------------------------------- 61 62bool UTF8ToWideAndAdjustOffset(const char* src, 63 size_t src_len, 64 std::wstring* output, 65 size_t* offset_for_adjustment) { 66 PrepareForUTF16Or32Output(src, src_len, output); 67 return ConvertUnicode(src, src_len, output, offset_for_adjustment); 68} 69 70std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8, 71 size_t* offset_for_adjustment) { 72 std::wstring ret; 73 UTF8ToWideAndAdjustOffset(utf8.data(), utf8.length(), &ret, 74 offset_for_adjustment); 75 return ret; 76} 77 78// UTF-16 <-> Wide ------------------------------------------------------------- 79 80#if defined(WCHAR_T_IS_UTF16) 81 82// When wide == UTF-16, then conversions are a NOP. 83bool UTF16ToWideAndAdjustOffset(const char16* src, 84 size_t src_len, 85 std::wstring* output, 86 size_t* offset_for_adjustment) { 87 output->assign(src, src_len); 88 if (offset_for_adjustment && (*offset_for_adjustment >= src_len)) 89 *offset_for_adjustment = std::wstring::npos; 90 return true; 91} 92 93std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, 94 size_t* offset_for_adjustment) { 95 if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length())) 96 *offset_for_adjustment = std::wstring::npos; 97 return utf16; 98} 99 100#elif defined(WCHAR_T_IS_UTF32) 101 102bool UTF16ToWideAndAdjustOffset(const char16* src, 103 size_t src_len, 104 std::wstring* output, 105 size_t* offset_for_adjustment) { 106 output->clear(); 107 // Assume that normally we won't have any non-BMP characters so the counts 108 // will be the same. 109 output->reserve(src_len); 110 return ConvertUnicode(src, src_len, output, offset_for_adjustment); 111} 112 113std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, 114 size_t* offset_for_adjustment) { 115 std::wstring ret; 116 UTF16ToWideAndAdjustOffset(utf16.data(), utf16.length(), &ret, 117 offset_for_adjustment); 118 return ret; 119} 120 121#endif // defined(WCHAR_T_IS_UTF32) 122