1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ****************************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copyright (C) 2003, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ****************************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupunycode.c from draft-ietf-idn-punycode-03 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruhttp://www.nicemice.net/idn/ 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAdam M. Costello 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruhttp://www.nicemice.net/amc/ 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThis is ANSI C code (C89) implementing 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruPunycode (draft-ietf-idn-punycode-03). 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruDisclaimer and license 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Regarding this entire document or any portion of it (including 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru the pseudocode and C code), the author makes no guarantees and 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru is not responsible for any damage resulting from its use. The 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru author grants irrevocable permission to anyone to use, modify, 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru and distribute it in any way that does not diminish the rights 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru of anyone else to use, modify, and distribute it, provided that 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru redistributed derivative works do not contain misleading author or 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru version information. Derivative works need not be licensed under 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru similar terms. 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef _PUNYREF_H 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define _PUNYREF_H 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/************************************************************/ 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Public interface (would normally go in its own .h file): */ 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_IDNA 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum punycode_status { 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru punycode_success, 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru punycode_bad_input, /* Input is invalid. */ 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru punycode_big_output, /* Output would exceed the space provided. */ 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru punycode_overflow /* Input needs wider integers to process. */ 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef uint32_t punycode_uint; 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum punycode_status punycode_encode( 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru punycode_uint input_length, 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const punycode_uint input[], 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const unsigned char case_flags[], 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru punycode_uint *output_length, 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char output[] ); 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* punycode_encode() converts Unicode to Punycode. The input */ 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* is represented as an array of Unicode code points (not code */ 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* units; surrogate pairs are not allowed), and the output */ 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will be represented as an array of ASCII code points. The */ 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output string is *not* null-terminated; it will contain */ 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* zeros if and only if the input contains zeros. (Of course */ 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the caller can leave room for a terminator and add one if */ 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* needed.) The input_length is the number of code points in */ 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the input. The output_length is an in/out argument: the */ 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* caller passes in the maximum number of code points that it */ 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* can receive, and on successful return it will contain the */ 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* number of code points actually output. The case_flags array */ 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* holds input_length boolean values, where nonzero suggests that */ 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the corresponding Unicode character be forced to uppercase */ 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* after being decoded (if possible), and zero suggests that */ 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* it be forced to lowercase (if possible). ASCII code points */ 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* are encoded literally, except that ASCII letters are forced */ 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* to uppercase or lowercase according to the corresponding */ 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* uppercase flags. If case_flags is a null pointer then ASCII */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* letters are left as they are, and other code points are */ 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* treated as if their uppercase flags were zero. The return */ 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* value can be any of the punycode_status values defined above */ 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* except punycode_bad_input; if not punycode_success, then */ 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output_size and output might contain garbage. */ 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum punycode_status punycode_decode( 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru punycode_uint input_length, 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char input[], 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru punycode_uint *output_length, 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru punycode_uint output[], 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned char case_flags[] ); 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* punycode_decode() converts Punycode to Unicode. The input is */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* represented as an array of ASCII code points, and the output */ 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* will be represented as an array of Unicode code points. The */ 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* input_length is the number of code points in the input. The */ 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output_length is an in/out argument: the caller passes in */ 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the maximum number of code points that it can receive, and */ 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* on successful return it will contain the actual number of */ 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* code points output. The case_flags array needs room for at */ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* least output_length values, or it can be a null pointer if the */ 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case information is not needed. A nonzero flag suggests that */ 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the corresponding Unicode character be forced to uppercase */ 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* by the caller (if possible), while zero suggests that it be */ 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* forced to lowercase (if possible). ASCII code points are */ 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output already in the proper case, but their flags will be set */ 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* appropriately so that applying the flags would be harmless. */ 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* The return value can be any of the punycode_status values */ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* defined above; if not punycode_success, then output_length, */ 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output, and case_flags might contain garbage. On success, the */ 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* decoder will never need to write an output_length greater than */ 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* input_length, because of how the encoding is defined. */ 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_IDNA */ 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 117