1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *******************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   Copyright (C) 2003-2005, International Business Machines
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   Corporation and others.  All Rights Reserved.
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *******************************************************************************
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   file name:  punyref.h
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   encoding:   US-ASCII
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   tab size:   8 (not used)
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   indentation:4
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   created on: 2003feb1
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   created by: Ram Viswanadha
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Disclaimer and license
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Regarding this entire document or any portion of it (including
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    the pseudocode and C code), the author makes no guarantees and
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    is not responsible for any damage resulting from its use.  The
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    author grants irrevocable permission to anyone to use, modify,
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    and distribute it in any way that does not diminish the rights
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    of anyone else to use, modify, and distribute it, provided that
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    redistributed derivative works do not contain misleading author or
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    version information.  Derivative works need not be licensed under
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    similar terms.
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)punycode.c 0.4.0 (2001-Nov-17-Sat)
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)http://www.cs.berkeley.edu/~amc/idn/
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Adam M. Costello
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)http://www.nicemice.net/amc/
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**********************************************************/
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Implementation (would normally go in its own .c file): */
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <string.h>
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_IDNA
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "punyref.h"
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*** Bootstring parameters for Punycode ***/
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum { base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700,
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)       initial_bias = 72, initial_n = 0x80, delimiter = 0x2D };
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* basic(cp) tests whether cp is a basic code point: */
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define basic(cp) ((punycode_uint)(cp) < 0x80)
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* delim(cp) tests whether cp is a delimiter: */
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define delim(cp) ((cp) == delimiter)
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* decode_digit(cp) returns the numeric value of a basic code */
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* point (for use in representing integers) in the range 0 to */
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* base-1, or base if cp is does not represent a value.       */
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static punycode_uint decode_digit(punycode_uint cp)
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  return  cp - 48 < 10 ? cp - 22 :  cp - 65 < 26 ? cp - 65 :
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          cp - 97 < 26 ? cp - 97 :  base;
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* encode_digit(d,flag) returns the basic code point whose value      */
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* (when used for representing integers) is d, which needs to be in   */
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* the range 0 to base-1.  The lowercase form is used unless flag is  */
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* nonzero, in which case the uppercase form is used.  The behavior   */
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* is undefined if flag is nonzero and digit d has no uppercase form. */
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static char encode_digit(punycode_uint d, int flag)
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  return (char) d + 22 + 75 * (d < 26) - ((flag != 0) << 5);
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /*  0..25 map to ASCII a..z or A..Z */
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* 26..35 map to ASCII 0..9         */
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* flagged(bcp) tests whether a basic code point is flagged */
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* (uppercase).  The behavior is undefined if bcp is not a  */
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* basic code point.                                        */
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26)
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* encode_basic(bcp,flag) forces a basic code point to lowercase */
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* if flag is zero, uppercase if flag is nonzero, and returns    */
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* the resulting code point.  The code point is unchanged if it  */
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* is caseless.  The behavior is undefined if bcp is not a basic */
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* code point.                                                   */
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static char encode_basic(punycode_uint bcp, int flag)
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  bcp -= (bcp - 97 < 26) << 5;
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  return (char) bcp + ((!flag && (bcp - 65 < 26)) << 5);
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*** Platform-specific constants ***/
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* maxint is the maximum value of a punycode_uint variable: */
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const punycode_uint maxint = (punycode_uint) (-1);
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Because maxint is unsigned, -1 becomes the maximum value. */
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*** Bias adaptation function ***/
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static punycode_uint adapt(
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  punycode_uint delta, punycode_uint numpoints, int firsttime )
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  punycode_uint k;
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  delta = firsttime ? delta / damp : delta >> 1;
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* delta >> 1 is a faster way of doing delta / 2 */
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  delta += delta / numpoints;
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  for (k = 0;  delta > ((base - tmin) * tmax) / 2;  k += base) {
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delta /= base - tmin;
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  return k + (base - tmin + 1) * delta / (delta + skew);
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*** Main encode function ***/
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum punycode_status punycode_encode(
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  punycode_uint input_length,
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  const punycode_uint input[],
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  const unsigned char case_flags[],
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  punycode_uint *output_length,
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  char output[] )
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  punycode_uint n, delta, h, b, out, max_out, bias, j, m, q, k, t;
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* Initialize the state: */
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  n = initial_n;
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  delta = out = 0;
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  max_out = *output_length;
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  bias = initial_bias;
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* Handle the basic code points: */
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  for (j = 0;  j < input_length;  ++j) {
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (basic(input[j])) {
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      if (max_out - out < 2) return punycode_big_output;
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      output[out++] = (char)
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (case_flags ?  encode_basic(input[j], case_flags[j]) : input[j]);
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* else if (input[j] < n) return punycode_bad_input; */
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* (not needed for Punycode with unsigned code points) */
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  h = b = out;
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* h is the number of code points that have been handled, b is the  */
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* number of basic code points, and out is the number of characters */
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* that have been output.                                           */
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  if (b > 0) output[out++] = delimiter;
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* Main encoding loop: */
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  while (h < input_length) {
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* All non-basic code points < n have been     */
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* handled already.  Find the next larger one: */
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (m = maxint, j = 0;  j < input_length;  ++j) {
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      /* if (basic(input[j])) continue; */
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      /* (not needed for Punycode) */
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      if (input[j] >= n && input[j] < m) m = input[j];
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* Increase delta enough to advance the decoder's    */
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* <n,i> state to <m,0>, but guard against overflow: */
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (m - n > (maxint - delta) / (h + 1)) return punycode_overflow;
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delta += (m - n) * (h + 1);
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    n = m;
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (j = 0;  j < input_length;  ++j) {
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      /* Punycode does not need to check whether input[j] is basic: */
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      if (input[j] < n /* || basic(input[j]) */ ) {
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (++delta == 0) return punycode_overflow;
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      }
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      if (input[j] == n) {
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* Represent delta as a generalized variable-length integer: */
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (q = delta, k = base;  ;  k += base) {
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          if (out >= max_out) return punycode_big_output;
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          t = k <= bias /* + tmin */ ? tmin :     /* +tmin not needed */
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              k >= bias + tmax ? tmax : k - bias;
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          if (q < t) break;
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          output[out++] = encode_digit(t + (q - t) % (base - t), 0);
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          q = (q - t) / (base - t);
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        output[out++] = encode_digit(q, case_flags && case_flags[j]);
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        bias = adapt(delta, h + 1, h == b);
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delta = 0;
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++h;
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      }
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ++delta, ++n;
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  *output_length = out;
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  return punycode_success;
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*** Main decode function ***/
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum punycode_status punycode_decode(
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  punycode_uint input_length,
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  const char input[],
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  punycode_uint *output_length,
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  punycode_uint output[],
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  unsigned char case_flags[] )
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  punycode_uint n, out, i, max_out, bias,
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 b, j, in, oldi, w, k, digit, t;
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* Initialize the state: */
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  n = initial_n;
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  out = i = 0;
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  max_out = *output_length;
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  bias = initial_bias;
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* Handle the basic code points:  Let b be the number of input code */
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* points before the last delimiter, or 0 if there is none, then    */
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* copy the first b code points to the output.                      */
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  for (b = j = 0;  j < input_length;  ++j) if (delim(input[j])) b = j;
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  if (b > max_out) return punycode_big_output;
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  for (j = 0;  j < b;  ++j) {
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (case_flags) case_flags[out] = flagged(input[j]);
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!basic(input[j])) return punycode_bad_input;
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    output[out++] = input[j];
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* Main decoding loop:  Start just after the last delimiter if any  */
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  /* basic code points were copied; start at the beginning otherwise. */
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  for (in = b > 0 ? b + 1 : 0;  in < input_length;  ++out) {
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* in is the index of the next character to be consumed, and */
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* out is the number of code points in the output array.     */
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* Decode a generalized variable-length integer into delta,  */
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* which gets added to i.  The overflow checking is easier   */
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* if we increase i as we go, then subtract off its starting */
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* value at the end to obtain delta.                         */
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (oldi = i, w = 1, k = base;  ;  k += base) {
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      if (in >= input_length) return punycode_bad_input;
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      digit = decode_digit(input[in++]);
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      if (digit >= base) return punycode_bad_input;
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      if (digit > (maxint - i) / w) return punycode_overflow;
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      i += digit * w;
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      t = k <= bias /* + tmin */ ? tmin :     /* +tmin not needed */
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          k >= bias + tmax ? tmax : k - bias;
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      if (digit < t) break;
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      if (w > maxint / (base - t)) return punycode_overflow;
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      w *= (base - t);
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bias = adapt(i - oldi, out + 1, oldi == 0);
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* i was supposed to wrap around from out+1 to 0,   */
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* incrementing n each time, so we'll fix that now: */
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (i / (out + 1) > maxint - n) return punycode_overflow;
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    n += i / (out + 1);
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    i %= (out + 1);
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* Insert n at position i of the output: */
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* not needed for Punycode: */
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* if (decode_digit(n) <= base) return punycode_invalid_input; */
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (out >= max_out) return punycode_big_output;
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (case_flags) {
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      memmove(case_flags + i + 1, case_flags + i, out - i);
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      /* Case of last character determines uppercase flag: */
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      case_flags[i] = flagged(input[in - 1]);
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    memmove(output + i + 1, output + i, (out - i) * sizeof *output);
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    output[i++] = n;
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  *output_length = out;
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  return punycode_success;
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_IDNA */
303