1/* crypto/asn1/a_utf8.c */ 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 59#include <stdio.h> 60#include "cryptlib.h" 61#include <openssl/asn1.h> 62 63 64/* UTF8 utilities */ 65 66/* This parses a UTF8 string one character at a time. It is passed a pointer 67 * to the string and the length of the string. It sets 'value' to the value of 68 * the current character. It returns the number of characters read or a 69 * negative error code: 70 * -1 = string too short 71 * -2 = illegal character 72 * -3 = subsequent characters not of the form 10xxxxxx 73 * -4 = character encoded incorrectly (not minimal length). 74 */ 75 76int UTF8_getc(const unsigned char *str, int len, unsigned long *val) 77{ 78 const unsigned char *p; 79 unsigned long value; 80 int ret; 81 if(len <= 0) return 0; 82 p = str; 83 84 /* Check syntax and work out the encoded value (if correct) */ 85 if((*p & 0x80) == 0) { 86 value = *p++ & 0x7f; 87 ret = 1; 88 } else if((*p & 0xe0) == 0xc0) { 89 if(len < 2) return -1; 90 if((p[1] & 0xc0) != 0x80) return -3; 91 value = (*p++ & 0x1f) << 6; 92 value |= *p++ & 0x3f; 93 if(value < 0x80) return -4; 94 ret = 2; 95 } else if((*p & 0xf0) == 0xe0) { 96 if(len < 3) return -1; 97 if( ((p[1] & 0xc0) != 0x80) 98 || ((p[2] & 0xc0) != 0x80) ) return -3; 99 value = (*p++ & 0xf) << 12; 100 value |= (*p++ & 0x3f) << 6; 101 value |= *p++ & 0x3f; 102 if(value < 0x800) return -4; 103 ret = 3; 104 } else if((*p & 0xf8) == 0xf0) { 105 if(len < 4) return -1; 106 if( ((p[1] & 0xc0) != 0x80) 107 || ((p[2] & 0xc0) != 0x80) 108 || ((p[3] & 0xc0) != 0x80) ) return -3; 109 value = ((unsigned long)(*p++ & 0x7)) << 18; 110 value |= (*p++ & 0x3f) << 12; 111 value |= (*p++ & 0x3f) << 6; 112 value |= *p++ & 0x3f; 113 if(value < 0x10000) return -4; 114 ret = 4; 115 } else if((*p & 0xfc) == 0xf8) { 116 if(len < 5) return -1; 117 if( ((p[1] & 0xc0) != 0x80) 118 || ((p[2] & 0xc0) != 0x80) 119 || ((p[3] & 0xc0) != 0x80) 120 || ((p[4] & 0xc0) != 0x80) ) return -3; 121 value = ((unsigned long)(*p++ & 0x3)) << 24; 122 value |= ((unsigned long)(*p++ & 0x3f)) << 18; 123 value |= ((unsigned long)(*p++ & 0x3f)) << 12; 124 value |= (*p++ & 0x3f) << 6; 125 value |= *p++ & 0x3f; 126 if(value < 0x200000) return -4; 127 ret = 5; 128 } else if((*p & 0xfe) == 0xfc) { 129 if(len < 6) return -1; 130 if( ((p[1] & 0xc0) != 0x80) 131 || ((p[2] & 0xc0) != 0x80) 132 || ((p[3] & 0xc0) != 0x80) 133 || ((p[4] & 0xc0) != 0x80) 134 || ((p[5] & 0xc0) != 0x80) ) return -3; 135 value = ((unsigned long)(*p++ & 0x1)) << 30; 136 value |= ((unsigned long)(*p++ & 0x3f)) << 24; 137 value |= ((unsigned long)(*p++ & 0x3f)) << 18; 138 value |= ((unsigned long)(*p++ & 0x3f)) << 12; 139 value |= (*p++ & 0x3f) << 6; 140 value |= *p++ & 0x3f; 141 if(value < 0x4000000) return -4; 142 ret = 6; 143 } else return -2; 144 *val = value; 145 return ret; 146} 147 148/* This takes a character 'value' and writes the UTF8 encoded value in 149 * 'str' where 'str' is a buffer containing 'len' characters. Returns 150 * the number of characters written or -1 if 'len' is too small. 'str' can 151 * be set to NULL in which case it just returns the number of characters. 152 * It will need at most 6 characters. 153 */ 154 155int UTF8_putc(unsigned char *str, int len, unsigned long value) 156{ 157 if(!str) len = 6; /* Maximum we will need */ 158 else if(len <= 0) return -1; 159 if(value < 0x80) { 160 if(str) *str = (unsigned char)value; 161 return 1; 162 } 163 if(value < 0x800) { 164 if(len < 2) return -1; 165 if(str) { 166 *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); 167 *str = (unsigned char)((value & 0x3f) | 0x80); 168 } 169 return 2; 170 } 171 if(value < 0x10000) { 172 if(len < 3) return -1; 173 if(str) { 174 *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0); 175 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 176 *str = (unsigned char)((value & 0x3f) | 0x80); 177 } 178 return 3; 179 } 180 if(value < 0x200000) { 181 if(len < 4) return -1; 182 if(str) { 183 *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0); 184 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); 185 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 186 *str = (unsigned char)((value & 0x3f) | 0x80); 187 } 188 return 4; 189 } 190 if(value < 0x4000000) { 191 if(len < 5) return -1; 192 if(str) { 193 *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8); 194 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80); 195 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); 196 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 197 *str = (unsigned char)((value & 0x3f) | 0x80); 198 } 199 return 5; 200 } 201 if(len < 6) return -1; 202 if(str) { 203 *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc); 204 *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80); 205 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80); 206 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); 207 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 208 *str = (unsigned char)((value & 0x3f) | 0x80); 209 } 210 return 6; 211} 212