strdup16to8.c revision a26c4e049a8fd163ba5f25d0c43c8a3fee86453f
1dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/* libs/cutils/strdup16to8.c 2dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** 3dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** Copyright 2006, The Android Open Source Project 4dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** 5dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** Licensed under the Apache License, Version 2.0 (the "License"); 6dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** you may not use this file except in compliance with the License. 7dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** You may obtain a copy of the License at 8dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** 9dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** http://www.apache.org/licenses/LICENSE-2.0 10dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** 11dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** Unless required by applicable law or agreed to in writing, software 12dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** distributed under the License is distributed on an "AS IS" BASIS, 13dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** See the License for the specific language governing permissions and 15dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** limitations under the License. 16dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project*/ 17dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 18a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner#include <limits.h> /* for SIZE_MAX */ 19a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 20dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include <cutils/jstring.h> 21dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include <assert.h> 22dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include <stdlib.h> 23dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 24dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 25dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/** 26dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Given a UTF-16 string, compute the length of the corresponding UTF-8 27dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * string in bytes. 28dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project */ 29dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Projectextern size_t strnlen16to8(const char16_t* utf16Str, size_t len) 30dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project{ 31a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner size_t utf8Len = 0; 32a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 33a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner /* A small note on integer overflow. The result can 34a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * potentially be as big as 3*len, which will overflow 35a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * for len > SIZE_MAX/3. 36a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * 37a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * Moreover, the result of a strnlen16to8 is typically used 38a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * to allocate a destination buffer to strncpy16to8 which 39a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * requires one more byte to terminate the UTF-8 copy, and 40a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * this is generally done by careless users by incrementing 41a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * the result without checking for integer overflows, e.g.: 42a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * 43a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * dst = malloc(strnlen16to8(utf16,len)+1) 44a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * 45a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * Due to this, the following code will try to detect 46a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * overflows, and never return more than (SIZE_MAX-1) 47a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * when it detects one. A careless user will try to malloc 48a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * SIZE_MAX bytes, which will return NULL which can at least 49a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * be detected appropriately. 50a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * 51a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * As far as I know, this function is only used by strndup16(), 52a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * but better be safe than sorry. 53a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner */ 54a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 55a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner /* Fast path for the usual case where 3*len is < SIZE_MAX-1. 56a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner */ 57a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner if (len < (SIZE_MAX-1)/3) { 58a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner while (len--) { 59a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner unsigned int uic = *utf16Str++; 60a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 61a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner if (uic > 0x07ff) 62a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner utf8Len += 3; 63a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner else if (uic > 0x7f || uic == 0) 64a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner utf8Len += 2; 65a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner else 66a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner utf8Len++; 67a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner } 68a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner return utf8Len; 69a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner } 70a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 71a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner /* The slower but paranoid version */ 72a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner while (len--) { 73a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner unsigned int uic = *utf16Str++; 74a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner size_t utf8Cur = utf8Len; 75a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 76a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner if (uic > 0x07ff) 77a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner utf8Len += 3; 78a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner else if (uic > 0x7f || uic == 0) 79a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner utf8Len += 2; 80a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner else 81a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner utf8Len++; 82a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 83a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner if (utf8Len < utf8Cur) /* overflow detected */ 84a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner return SIZE_MAX-1; 85a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner } 86a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 87a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner /* don't return SIZE_MAX to avoid common user bug */ 88a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner if (utf8Len == SIZE_MAX) 89a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner utf8Len = SIZE_MAX-1; 90a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 91a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner return utf8Len; 92dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project} 93dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 94dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 95dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/** 96dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Convert a Java-Style UTF-16 string + length to a JNI-Style UTF-8 string. 97dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * 98dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * This basically means: embedded \0's in the UTF-16 string are encoded 99dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * as "0xc0 0x80" 100dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * 101dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Make sure you allocate "utf8Str" with the result of strlen16to8() + 1, 102dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * not just "len". 103a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * 104dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Please note, a terminated \0 is always added, so your result will always 105dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * be "strlen16to8() + 1" bytes long. 106dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project */ 107dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Projectextern char* strncpy16to8(char* utf8Str, const char16_t* utf16Str, size_t len) 108dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project{ 109dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project char* utf8cur = utf8Str; 110dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 111a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner /* Note on overflows: We assume the user did check the result of 112a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * strnlen16to8() properly or at a minimum checked the result of 113a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * its malloc(SIZE_MAX) in case of overflow. 114a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner */ 115dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project while (len--) { 116dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project unsigned int uic = *utf16Str++; 117dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 118dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if (uic > 0x07ff) { 119dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *utf8cur++ = (uic >> 12) | 0xe0; 120dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *utf8cur++ = ((uic >> 6) & 0x3f) | 0x80; 121dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *utf8cur++ = (uic & 0x3f) | 0x80; 122dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } else if (uic > 0x7f || uic == 0) { 123dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *utf8cur++ = (uic >> 6) | 0xc0; 124dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *utf8cur++ = (uic & 0x3f) | 0x80; 125dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } else { 126dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *utf8cur++ = uic; 127dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 128dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if (uic == 0) { 129dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project break; 130a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner } 131a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner } 132dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 133dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 134dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *utf8cur = '\0'; 135dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 136dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return utf8Str; 137dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project} 138dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 139dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/** 140dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Convert a UTF-16 string to UTF-8. 141dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * 142dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project */ 143dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Projectchar * strndup16to8 (const char16_t* s, size_t n) 144dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project{ 145a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner char* ret; 146a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner size_t len; 147dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 148dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project if (s == NULL) { 149dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project return NULL; 150dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project } 151dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 152a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner len = strnlen16to8(s, n); 153a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 154a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner /* We are paranoid, and we check for SIZE_MAX-1 155a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * too since it is an overflow value for our 156a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner * strnlen16to8 implementation. 157a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner */ 158a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner if (len >= SIZE_MAX-1) 159a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner return NULL; 160a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 161a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner ret = malloc(len + 1); 162a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner if (ret == NULL) 163a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner return NULL; 164dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project 165dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project strncpy16to8 (ret, s, n); 166a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner 167a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner return ret; 168dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project} 169