1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Copyright (C) 2003, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupunycode.c from draft-ietf-idn-punycode-03
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruhttp://www.nicemice.net/idn/
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAdam M. Costello
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruhttp://www.nicemice.net/amc/
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThis is ANSI C code (C89) implementing
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruPunycode (draft-ietf-idn-punycode-03).
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruDisclaimer and license
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Regarding this entire document or any portion of it (including
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    the pseudocode and C code), the author makes no guarantees and
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    is not responsible for any damage resulting from its use.  The
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    author grants irrevocable permission to anyone to use, modify,
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    and distribute it in any way that does not diminish the rights
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    of anyone else to use, modify, and distribute it, provided that
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    redistributed derivative works do not contain misleading author or
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    version information.  Derivative works need not be licensed under
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    similar terms.
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef _PUNYREF_H
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define _PUNYREF_H
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/************************************************************/
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Public interface (would normally go in its own .h file): */
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_IDNA
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum punycode_status {
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  punycode_success,
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  punycode_bad_input,   /* Input is invalid.                       */
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  punycode_big_output,  /* Output would exceed the space provided. */
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  punycode_overflow     /* Input needs wider integers to process.  */
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef uint32_t punycode_uint;
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum punycode_status  punycode_encode(
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  punycode_uint input_length,
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  const punycode_uint input[],
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  const unsigned char case_flags[],
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  punycode_uint *output_length,
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  char output[] );
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* punycode_encode() converts Unicode to Punycode.  The input     */
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* is represented as an array of Unicode code points (not code    */
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* units; surrogate pairs are not allowed), and the output        */
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* will be represented as an array of ASCII code points.  The     */
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* output string is *not* null-terminated; it will contain        */
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* zeros if and only if the input contains zeros.  (Of course     */
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* the caller can leave room for a terminator and add one if      */
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* needed.)  The input_length is the number of code points in     */
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* the input.  The output_length is an in/out argument: the       */
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* caller passes in the maximum number of code points that it     */
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* can receive, and on successful return it will contain the      */
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* number of code points actually output.  The case_flags array   */
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* holds input_length boolean values, where nonzero suggests that */
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* the corresponding Unicode character be forced to uppercase     */
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* after being decoded (if possible), and zero suggests that      */
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* it be forced to lowercase (if possible).  ASCII code points    */
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* are encoded literally, except that ASCII letters are forced    */
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* to uppercase or lowercase according to the corresponding       */
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* uppercase flags.  If case_flags is a null pointer then ASCII   */
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* letters are left as they are, and other code points are        */
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* treated as if their uppercase flags were zero.  The return     */
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* value can be any of the punycode_status values defined above   */
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* except punycode_bad_input; if not punycode_success, then       */
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* output_size and output might contain garbage.                  */
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum punycode_status punycode_decode(
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  punycode_uint input_length,
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  const char input[],
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  punycode_uint *output_length,
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  punycode_uint output[],
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  unsigned char case_flags[] );
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* punycode_decode() converts Punycode to Unicode.  The input is  */
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* represented as an array of ASCII code points, and the output   */
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* will be represented as an array of Unicode code points.  The   */
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* input_length is the number of code points in the input.  The   */
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* output_length is an in/out argument: the caller passes in      */
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* the maximum number of code points that it can receive, and     */
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* on successful return it will contain the actual number of      */
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* code points output.  The case_flags array needs room for at    */
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* least output_length values, or it can be a null pointer if the */
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* case information is not needed.  A nonzero flag suggests that  */
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* the corresponding Unicode character be forced to uppercase     */
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* by the caller (if possible), while zero suggests that it be    */
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* forced to lowercase (if possible).  ASCII code points are      */
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* output already in the proper case, but their flags will be set */
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* appropriately so that applying the flags would be harmless.    */
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* The return value can be any of the punycode_status values      */
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* defined above; if not punycode_success, then output_length,    */
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* output, and case_flags might contain garbage.  On success, the */
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* decoder will never need to write an output_length greater than */
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* input_length, because of how the encoding is defined.          */
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_IDNA */
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
117