strdup16to8.c revision a26c4e049a8fd163ba5f25d0c43c8a3fee86453f
1dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/* libs/cutils/strdup16to8.c
2dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project**
3dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** Copyright 2006, The Android Open Source Project
4dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project**
5dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** Licensed under the Apache License, Version 2.0 (the "License");
6dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** you may not use this file except in compliance with the License.
7dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** You may obtain a copy of the License at
8dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project**
9dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project**     http://www.apache.org/licenses/LICENSE-2.0
10dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project**
11dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** Unless required by applicable law or agreed to in writing, software
12dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** distributed under the License is distributed on an "AS IS" BASIS,
13dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** See the License for the specific language governing permissions and
15dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project** limitations under the License.
16dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project*/
17dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
18a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner#include <limits.h>  /* for SIZE_MAX */
19a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
20dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include <cutils/jstring.h>
21dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include <assert.h>
22dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project#include <stdlib.h>
23dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
24dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
25dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/**
26dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Given a UTF-16 string, compute the length of the corresponding UTF-8
27dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * string in bytes.
28dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project */
29dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Projectextern size_t strnlen16to8(const char16_t* utf16Str, size_t len)
30dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project{
31a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    size_t utf8Len = 0;
32a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
33a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    /* A small note on integer overflow. The result can
34a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * potentially be as big as 3*len, which will overflow
35a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * for len > SIZE_MAX/3.
36a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     *
37a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * Moreover, the result of a strnlen16to8 is typically used
38a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * to allocate a destination buffer to strncpy16to8 which
39a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * requires one more byte to terminate the UTF-8 copy, and
40a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * this is generally done by careless users by incrementing
41a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * the result without checking for integer overflows, e.g.:
42a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     *
43a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     *   dst = malloc(strnlen16to8(utf16,len)+1)
44a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     *
45a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * Due to this, the following code will try to detect
46a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * overflows, and never return more than (SIZE_MAX-1)
47a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * when it detects one. A careless user will try to malloc
48a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * SIZE_MAX bytes, which will return NULL which can at least
49a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * be detected appropriately.
50a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     *
51a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * As far as I know, this function is only used by strndup16(),
52a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * but better be safe than sorry.
53a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     */
54a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
55a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    /* Fast path for the usual case where 3*len is < SIZE_MAX-1.
56a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     */
57a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    if (len < (SIZE_MAX-1)/3) {
58a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        while (len--) {
59a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner            unsigned int uic = *utf16Str++;
60a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
61a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner            if (uic > 0x07ff)
62a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner                utf8Len += 3;
63a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner            else if (uic > 0x7f || uic == 0)
64a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner                utf8Len += 2;
65a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner            else
66a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner                utf8Len++;
67a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        }
68a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        return utf8Len;
69a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    }
70a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
71a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    /* The slower but paranoid version */
72a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    while (len--) {
73a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        unsigned int  uic     = *utf16Str++;
74a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        size_t        utf8Cur = utf8Len;
75a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
76a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        if (uic > 0x07ff)
77a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner            utf8Len += 3;
78a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        else if (uic > 0x7f || uic == 0)
79a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner            utf8Len += 2;
80a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        else
81a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner            utf8Len++;
82a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
83a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        if (utf8Len < utf8Cur) /* overflow detected */
84a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner            return SIZE_MAX-1;
85a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    }
86a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
87a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    /* don't return SIZE_MAX to avoid common user bug */
88a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    if (utf8Len == SIZE_MAX)
89a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        utf8Len = SIZE_MAX-1;
90a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
91a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    return utf8Len;
92dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project}
93dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
94dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
95dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/**
96dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Convert a Java-Style UTF-16 string + length to a JNI-Style UTF-8 string.
97dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *
98dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * This basically means: embedded \0's in the UTF-16 string are encoded
99dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * as "0xc0 0x80"
100dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *
101dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Make sure you allocate "utf8Str" with the result of strlen16to8() + 1,
102dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * not just "len".
103a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner *
104dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Please note, a terminated \0 is always added, so your result will always
105dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * be "strlen16to8() + 1" bytes long.
106dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project */
107dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Projectextern char* strncpy16to8(char* utf8Str, const char16_t* utf16Str, size_t len)
108dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project{
109dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    char* utf8cur = utf8Str;
110dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
111a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    /* Note on overflows: We assume the user did check the result of
112a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * strnlen16to8() properly or at a minimum checked the result of
113a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * its malloc(SIZE_MAX) in case of overflow.
114a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     */
115dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    while (len--) {
116dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        unsigned int uic = *utf16Str++;
117dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
118dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        if (uic > 0x07ff) {
119dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            *utf8cur++ = (uic >> 12) | 0xe0;
120dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            *utf8cur++ = ((uic >> 6) & 0x3f) | 0x80;
121dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            *utf8cur++ = (uic & 0x3f) | 0x80;
122dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        } else if (uic > 0x7f || uic == 0) {
123dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            *utf8cur++ = (uic >> 6) | 0xc0;
124dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            *utf8cur++ = (uic & 0x3f) | 0x80;
125dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        } else {
126dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            *utf8cur++ = uic;
127dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
128dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project            if (uic == 0) {
129dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project                break;
130a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner            }
131a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        }
132dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
133dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
134dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project   *utf8cur = '\0';
135dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
136dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project   return utf8Str;
137dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project}
138dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
139dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project/**
140dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project * Convert a UTF-16 string to UTF-8.
141dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project *
142dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project */
143dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Projectchar * strndup16to8 (const char16_t* s, size_t n)
144dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project{
145a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    char*   ret;
146a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    size_t  len;
147dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
148dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    if (s == NULL) {
149dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project        return NULL;
150dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    }
151dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
152a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    len = strnlen16to8(s, n);
153a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
154a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    /* We are paranoid, and we check for SIZE_MAX-1
155a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * too since it is an overflow value for our
156a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     * strnlen16to8 implementation.
157a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner     */
158a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    if (len >= SIZE_MAX-1)
159a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        return NULL;
160a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
161a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    ret = malloc(len + 1);
162a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    if (ret == NULL)
163a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner        return NULL;
164dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project
165dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project    strncpy16to8 (ret, s, n);
166a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner
167a26c4e049a8fd163ba5f25d0c43c8a3fee86453fDavid 'Digit' Turner    return ret;
168dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0The Android Open Source Project}
169