1// Copyright (c) 2006, Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30#include <string.h> 31 32#include "common/convert_UTF.h" 33#include "common/scoped_ptr.h" 34#include "common/string_conversion.h" 35#include "common/using_std_string.h" 36 37namespace google_breakpad { 38 39using std::vector; 40 41void UTF8ToUTF16(const char *in, vector<uint16_t> *out) { 42 size_t source_length = strlen(in); 43 const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in); 44 const UTF8 *source_end_ptr = source_ptr + source_length; 45 // Erase the contents and zero fill to the expected size 46 out->clear(); 47 out->insert(out->begin(), source_length, 0); 48 uint16_t *target_ptr = &(*out)[0]; 49 uint16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(uint16_t); 50 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, 51 &target_ptr, target_end_ptr, 52 strictConversion); 53 54 // Resize to be the size of the # of converted characters + NULL 55 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); 56} 57 58int UTF8ToUTF16Char(const char *in, int in_length, uint16_t out[2]) { 59 const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in); 60 const UTF8 *source_end_ptr = source_ptr + sizeof(char); 61 uint16_t *target_ptr = out; 62 uint16_t *target_end_ptr = target_ptr + 2 * sizeof(uint16_t); 63 out[0] = out[1] = 0; 64 65 // Process one character at a time 66 while (1) { 67 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, 68 &target_ptr, target_end_ptr, 69 strictConversion); 70 71 if (result == conversionOK) 72 return static_cast<int>(source_ptr - reinterpret_cast<const UTF8 *>(in)); 73 74 // Add another character to the input stream and try again 75 source_ptr = reinterpret_cast<const UTF8 *>(in); 76 ++source_end_ptr; 77 78 if (source_end_ptr > reinterpret_cast<const UTF8 *>(in) + in_length) 79 break; 80 } 81 82 return 0; 83} 84 85void UTF32ToUTF16(const wchar_t *in, vector<uint16_t> *out) { 86 size_t source_length = wcslen(in); 87 const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(in); 88 const UTF32 *source_end_ptr = source_ptr + source_length; 89 // Erase the contents and zero fill to the expected size 90 out->clear(); 91 out->insert(out->begin(), source_length, 0); 92 uint16_t *target_ptr = &(*out)[0]; 93 uint16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(uint16_t); 94 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, 95 &target_ptr, target_end_ptr, 96 strictConversion); 97 98 // Resize to be the size of the # of converted characters + NULL 99 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); 100} 101 102void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) { 103 const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(&in); 104 const UTF32 *source_end_ptr = source_ptr + 1; 105 uint16_t *target_ptr = out; 106 uint16_t *target_end_ptr = target_ptr + 2 * sizeof(uint16_t); 107 out[0] = out[1] = 0; 108 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, 109 &target_ptr, target_end_ptr, 110 strictConversion); 111 112 if (result != conversionOK) { 113 out[0] = out[1] = 0; 114 } 115} 116 117static inline uint16_t Swap(uint16_t value) { 118 return (value >> 8) | static_cast<uint16_t>(value << 8); 119} 120 121string UTF16ToUTF8(const vector<uint16_t> &in, bool swap) { 122 const UTF16 *source_ptr = &in[0]; 123 scoped_array<uint16_t> source_buffer; 124 125 // If we're to swap, we need to make a local copy and swap each byte pair 126 if (swap) { 127 int idx = 0; 128 source_buffer.reset(new uint16_t[in.size()]); 129 UTF16 *source_buffer_ptr = source_buffer.get(); 130 for (vector<uint16_t>::const_iterator it = in.begin(); 131 it != in.end(); ++it, ++idx) 132 source_buffer_ptr[idx] = Swap(*it); 133 134 source_ptr = source_buffer.get(); 135 } 136 137 // The maximum expansion would be 4x the size of the input string. 138 const UTF16 *source_end_ptr = source_ptr + in.size(); 139 size_t target_capacity = in.size() * 4; 140 scoped_array<UTF8> target_buffer(new UTF8[target_capacity]); 141 UTF8 *target_ptr = target_buffer.get(); 142 UTF8 *target_end_ptr = target_ptr + target_capacity; 143 ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr, 144 &target_ptr, target_end_ptr, 145 strictConversion); 146 147 if (result == conversionOK) { 148 const char *targetPtr = reinterpret_cast<const char *>(target_buffer.get()); 149 return targetPtr; 150 } 151 152 return ""; 153} 154 155} // namespace google_breakpad 156