16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2002-2011, International Business Machines 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: punycode.cpp 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2002jan31 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* This ICU code derived from: */ 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpunycode.c 0.4.0 (2001-Nov-17-Sat) 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orghttp://www.cs.berkeley.edu/~amc/idn/ 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgAdam M. Costello 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orghttp://www.nicemice.net/amc/ 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgDisclaimer and license 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Regarding this entire document or any portion of it (including 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org the pseudocode and C code), the author makes no guarantees and 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org is not responsible for any damage resulting from its use. The 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org author grants irrevocable permission to anyone to use, modify, 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org and distribute it in any way that does not diminish the rights 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org of anyone else to use, modify, and distribute it, provided that 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org redistributed derivative works do not contain misleading author or 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org version information. Derivative works need not be licensed under 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org similar terms. 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ICU modifications: 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - ICU data types and coding conventions 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - ICU string buffer handling with implicit source lengths 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and destination preflighting 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - UTF-16 handling 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_IDNA 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h" 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf.h" 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ustr_imp.h" 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h" 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "punycode.h" 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uassert.h" 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Punycode ----------------------------------------------------------------- */ 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Punycode parameters for Bootstring */ 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BASE 36 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define TMIN 1 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define TMAX 26 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SKEW 38 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define DAMP 700 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define INITIAL_BIAS 72 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define INITIAL_N 0x80 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* "Basic" Unicode/ASCII code points */ 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _HYPHEN 0X2d 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define DELIMITER _HYPHEN 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _ZERO_ 0X30 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _NINE 0x39 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _SMALL_A 0X61 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _SMALL_Z 0X7a 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _CAPITAL_A 0X41 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _CAPITAL_Z 0X5a 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define IS_BASIC(c) ((c)<0x80) 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define IS_BASIC_UPPERCASE(c) (_CAPITAL_A<=(c) && (c)<=_CAPITAL_Z) 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * digitToBasic() returns the basic code point whose value 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (when used for representing integers) is d, which must be in the 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * nonzero, in which case the uppercase form is used. 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline char 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgdigitToBasic(int32_t digit, UBool uppercase) { 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 0..25 map to ASCII a..z or A..Z */ 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 26..35 map to ASCII 0..9 */ 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(digit<26) { 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(uppercase) { 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (char)(_CAPITAL_A+digit); 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (char)(_SMALL_A+digit); 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (char)((_ZERO_-26)+digit); 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * basicToDigit[] contains the numeric value of a basic code 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * point (for use in representing integers) in the range 0 to 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * BASE-1, or -1 if b is does not represent a value. 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const int8_t 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgbasicToDigit[256]={ 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline char 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgasciiCaseMap(char b, UBool uppercase) { 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(uppercase) { 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(_SMALL_A<=b && b<=_SMALL_Z) { 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b-=(_SMALL_A-_CAPITAL_A); 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(_CAPITAL_A<=b && b<=_CAPITAL_Z) { 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b+=(_SMALL_A-_CAPITAL_A); 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return b; 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Punycode-specific Bootstring code ---------------------------------------- */ 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The following code omits the {parts} of the pseudo-algorithm in the spec 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * that are not used with the Punycode parameter set. 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Bias adaptation function. */ 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgadaptBias(int32_t delta, int32_t length, UBool firstTime) { 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t count; 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(firstTime) { 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delta/=DAMP; 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delta/=2; 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delta+=delta/length; 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(count=0; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) { 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delta/=(BASE-TMIN); 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return count+(((BASE-TMIN+1)*delta)/(delta+SKEW)); 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAX_CP_COUNT 200 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strToPunycode(const UChar *src, int32_t srcLength, 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *dest, int32_t destCapacity, 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UBool *caseFlags, 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t cpBuffer[MAX_CP_COUNT]; 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c, c2; 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* argument checking */ 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Handle the basic code points and 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org srcCPCount=destLength=0; 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(srcLength==-1) { 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* NUL-terminated input */ 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(j=0; /* no condition */; ++j) { 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((c=src[j])==0) { 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(srcCPCount==MAX_CP_COUNT) { 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* too many input code points */ 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(IS_BASIC(c)) { 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cpBuffer[srcCPCount++]=0; 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(destLength<destCapacity) { 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[destLength]= 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org caseFlags!=NULL ? 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org asciiCaseMap((char)c, caseFlags[j]) : 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (char)c; 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++destLength; 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n=(caseFlags!=NULL && caseFlags[j])<<31L; 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SINGLE(c)) { 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n|=c; 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(U16_IS_LEAD(c) && U16_IS_TRAIL(c2=src[j+1])) { 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++j; 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2); 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* error: unmatched surrogate */ 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_CHAR_FOUND; 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cpBuffer[srcCPCount++]=n; 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* length-specified input */ 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(j=0; j<srcLength; ++j) { 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(srcCPCount==MAX_CP_COUNT) { 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* too many input code points */ 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=src[j]; 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(IS_BASIC(c)) { 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cpBuffer[srcCPCount++]=0; 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(destLength<destCapacity) { 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[destLength]= 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org caseFlags!=NULL ? 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org asciiCaseMap((char)c, caseFlags[j]) : 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (char)c; 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++destLength; 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n=(caseFlags!=NULL && caseFlags[j])<<31L; 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SINGLE(c)) { 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n|=c; 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(U16_IS_LEAD(c) && (j+1)<srcLength && U16_IS_TRAIL(c2=src[j+1])) { 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++j; 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2); 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* error: unmatched surrogate */ 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_CHAR_FOUND; 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cpBuffer[srcCPCount++]=n; 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Finish the basic string - if it is not empty - with a delimiter. */ 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org basicLength=destLength; 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(basicLength>0) { 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(destLength<destCapacity) { 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[destLength]=DELIMITER; 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++destLength; 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * handledCPCount is the number of code points that have been handled 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * basicLength is the number of basic code points 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * destLength is the number of chars that have been output 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Initialize the state: */ 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n=INITIAL_N; 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delta=0; 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bias=INITIAL_BIAS; 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Main encoding loop: */ 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) { 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * All non-basic code points < n have been handled already. 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Find the next larger one: 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(m=0x7fffffff, j=0; j<srcCPCount; ++j) { 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */ 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(n<=q && q<m) { 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org m=q; 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Increase delta enough to advance the decoder's 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <n,i> state to <m,0>, but guard against overflow: 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) { 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INTERNAL_PROGRAM_ERROR; 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delta+=(m-n)*(handledCPCount+1); 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n=m; 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Encode a sequence of same code points n */ 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(j=0; j<srcCPCount; ++j) { 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */ 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(q<n) { 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++delta; 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(q==n) { 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Represent delta as a generalized variable-length integer: */ 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(q=delta, k=BASE; /* no condition */; k+=BASE) { 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=k-bias; 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(t<TMIN) { 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=TMIN; 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(t>TMAX) { 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=TMAX; 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=k-bias; 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(t<TMIN) { 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=TMIN; 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(k>=(bias+TMAX)) { 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=TMAX; 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(q<t) { 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(destLength<destCapacity) { 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[destLength]=digitToBasic(t+(q-t)%(BASE-t), 0); 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++destLength; 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org q=(q-t)/(BASE-t); 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(destLength<destCapacity) { 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[destLength]=digitToBasic(q, (UBool)(cpBuffer[j]<0)); 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++destLength; 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength)); 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delta=0; 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++handledCPCount; 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++delta; 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++n; 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_strFromPunycode(const UChar *src, int32_t srcLength, 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *dest, int32_t destCapacity, 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool *caseFlags, 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t, 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destCPCount, firstSupplementaryIndex, cpLength; 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar b; 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* argument checking */ 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(srcLength==-1) { 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org srcLength=u_strlen(src); 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Handle the basic code points: 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Let basicLength be the number of input code points 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * before the last delimiter, or 0 if there is none, 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then copy the first basicLength code points to the output. 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The two following loops iterate backward. 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(j=srcLength; j>0;) { 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src[--j]==DELIMITER) { 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLength=basicLength=destCPCount=j; 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(destLength>=0); 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(j>0) { 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=src[--j]; 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!IS_BASIC(b)) { 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_CHAR_FOUND; 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(j<destCapacity) { 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[j]=(UChar)b; 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(caseFlags!=NULL) { 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org caseFlags[j]=IS_BASIC_UPPERCASE(b); 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Initialize the state: */ 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n=INITIAL_N; 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i=0; 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bias=INITIAL_BIAS; 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org firstSupplementaryIndex=1000000000; 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Main decoding loop: 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Start just after the last delimiter if any 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * basic code points were copied; start at the beginning otherwise. 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) { 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in is the index of the next character to be consumed, and 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * destCPCount is the number of code points in the output array. 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Decode a generalized variable-length integer into delta, 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * which gets added to i. The overflow checking is easier 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if we increase i as we go, then subtract off its starting 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * value at the end to obtain delta. 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) { 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(in>=srcLength) { 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org digit=basicToDigit[(uint8_t)src[in++]]; 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(digit<0) { 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_CHAR_FOUND; 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(digit>(0x7fffffff-i)/w) { 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* integer overflow */ 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i+=digit*w; 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=k-bias; 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(t<TMIN) { 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=TMIN; 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(t>TMAX) { 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=TMAX; 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=k-bias; 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(t<TMIN) { 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=TMIN; 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(k>=(bias+TMAX)) { 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org t=TMAX; 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(digit<t) { 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(w>0x7fffffff/(BASE-t)) { 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* integer overflow */ 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org w*=BASE-t; 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Modification from sample code: 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Increments destCPCount here, 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * where needed instead of in for() loop tail. 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++destCPCount; 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0)); 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * i was supposed to wrap around from (incremented) destCPCount to 0, 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * incrementing n each time, so we'll fix that now: 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i/destCPCount>(0x7fffffff-n)) { 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* integer overflow */ 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n+=i/destCPCount; 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i%=destCPCount; 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not needed for Punycode: */ 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(n>0x10ffff || U_IS_SURROGATE(n)) { 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Unicode code point overflow */ 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Insert n at position i of the output: */ 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cpLength=U16_LENGTH(n); 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) { 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t codeUnitIndex; 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Handle indexes when supplementary code points are present. 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In almost all cases, there will be only BMP code points before i 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and even in the entire string. 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is handled with the same efficiency as with UTF-32. 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Only the rare cases with supplementary code points are handled 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * more slowly - but not too bad since this is an insertion anyway. 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<=firstSupplementaryIndex) { 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org codeUnitIndex=i; 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cpLength>1) { 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org firstSupplementaryIndex=codeUnitIndex; 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++firstSupplementaryIndex; 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org codeUnitIndex=firstSupplementaryIndex; 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex); 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use the UChar index codeUnitIndex instead of the code point index i */ 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(codeUnitIndex<destLength) { 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memmove(dest+codeUnitIndex+cpLength, 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest+codeUnitIndex, 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (destLength-codeUnitIndex)*U_SIZEOF_UCHAR); 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(caseFlags!=NULL) { 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memmove(caseFlags+codeUnitIndex+cpLength, 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org caseFlags+codeUnitIndex, 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLength-codeUnitIndex); 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cpLength==1) { 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* BMP, insert one code unit */ 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[codeUnitIndex]=(UChar)n; 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* supplementary character, insert two code units */ 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[codeUnitIndex]=U16_LEAD(n); 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[codeUnitIndex+1]=U16_TRAIL(n); 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(caseFlags!=NULL) { 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Case of last character determines uppercase flag: */ 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]); 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cpLength==2) { 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org caseFlags[codeUnitIndex+1]=FALSE; 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLength+=cpLength; 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(destLength>=0); 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++i; 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */ 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_IDNA */ 588