1/*
2*******************************************************************************
3*   Copyright (C) 2010, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  idna.h
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2010mar05
12*   created by: Markus W. Scherer
13*/
14
15#ifndef __IDNA_H__
16#define __IDNA_H__
17
18/**
19 * \file
20 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
21 */
22
23#include "unicode/utypes.h"
24
25#if !UCONFIG_NO_IDNA
26
27#include "unicode/bytestream.h"
28#include "unicode/stringpiece.h"
29#include "unicode/uidna.h"
30#include "unicode/unistr.h"
31
32U_NAMESPACE_BEGIN
33
34class U_COMMON_API IDNAInfo;
35
36/**
37 * Abstract base class for IDNA processing.
38 * See http://www.unicode.org/reports/tr46/
39 * and http://www.ietf.org/rfc/rfc3490.txt
40 *
41 * The IDNA class is not intended for public subclassing.
42 *
43 * This C++ API currently only implements UTS #46.
44 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
45 * and IDNA2003 (functions that do not use a service object).
46 * @draft ICU 4.6
47 */
48class U_COMMON_API IDNA : public UObject {
49public:
50    /**
51     * Returns an IDNA instance which implements UTS #46.
52     * Returns an unmodifiable instance, owned by the caller.
53     * Cache it for multiple operations, and delete it when done.
54     * The instance is thread-safe, that is, it can be used concurrently.
55     *
56     * UTS #46 defines Unicode IDNA Compatibility Processing,
57     * updated to the latest version of Unicode and compatible with both
58     * IDNA2003 and IDNA2008.
59     *
60     * The worker functions use transitional processing, including deviation mappings,
61     * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
62     * is used in which case the deviation characters are passed through without change.
63     *
64     * Disallowed characters are mapped to U+FFFD.
65     *
66     * For available options see the uidna.h header.
67     * Operations with the UTS #46 instance do not support the
68     * UIDNA_ALLOW_UNASSIGNED option.
69     *
70     * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
71     * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
72     * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
73     *
74     * @param options Bit set to modify the processing and error checking.
75     *                See option bit set values in uidna.h.
76     * @param errorCode Standard ICU error code. Its input value must
77     *                  pass the U_SUCCESS() test, or else the function returns
78     *                  immediately. Check for U_FAILURE() on output or use with
79     *                  function chaining. (See User Guide for details.)
80     * @return the UTS #46 IDNA instance, if successful
81     * @draft ICU 4.6
82     */
83    static IDNA *
84    createUTS46Instance(uint32_t options, UErrorCode &errorCode);
85
86    /**
87     * Converts a single domain name label into its ASCII form for DNS lookup.
88     * If any processing step fails, then info.hasErrors() will be TRUE and
89     * the result might not be an ASCII string.
90     * The label might be modified according to the types of errors.
91     * Labels with severe errors will be left in (or turned into) their Unicode form.
92     *
93     * The UErrorCode indicates an error only in exceptional cases,
94     * such as a U_MEMORY_ALLOCATION_ERROR.
95     *
96     * @param label Input domain name label
97     * @param dest Destination string object
98     * @param info Output container of IDNA processing details.
99     * @param errorCode Standard ICU error code. Its input value must
100     *                  pass the U_SUCCESS() test, or else the function returns
101     *                  immediately. Check for U_FAILURE() on output or use with
102     *                  function chaining. (See User Guide for details.)
103     * @return dest
104     * @draft ICU 4.6
105     */
106    virtual UnicodeString &
107    labelToASCII(const UnicodeString &label, UnicodeString &dest,
108                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
109
110    /**
111     * Converts a single domain name label into its Unicode form for human-readable display.
112     * If any processing step fails, then info.hasErrors() will be TRUE.
113     * The label might be modified according to the types of errors.
114     *
115     * The UErrorCode indicates an error only in exceptional cases,
116     * such as a U_MEMORY_ALLOCATION_ERROR.
117     *
118     * @param label Input domain name label
119     * @param dest Destination string object
120     * @param info Output container of IDNA processing details.
121     * @param errorCode Standard ICU error code. Its input value must
122     *                  pass the U_SUCCESS() test, or else the function returns
123     *                  immediately. Check for U_FAILURE() on output or use with
124     *                  function chaining. (See User Guide for details.)
125     * @return dest
126     * @draft ICU 4.6
127     */
128    virtual UnicodeString &
129    labelToUnicode(const UnicodeString &label, UnicodeString &dest,
130                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
131
132    /**
133     * Converts a whole domain name into its ASCII form for DNS lookup.
134     * If any processing step fails, then info.hasErrors() will be TRUE and
135     * the result might not be an ASCII string.
136     * The domain name might be modified according to the types of errors.
137     * Labels with severe errors will be left in (or turned into) their Unicode form.
138     *
139     * The UErrorCode indicates an error only in exceptional cases,
140     * such as a U_MEMORY_ALLOCATION_ERROR.
141     *
142     * @param name Input domain name
143     * @param dest Destination string object
144     * @param info Output container of IDNA processing details.
145     * @param errorCode Standard ICU error code. Its input value must
146     *                  pass the U_SUCCESS() test, or else the function returns
147     *                  immediately. Check for U_FAILURE() on output or use with
148     *                  function chaining. (See User Guide for details.)
149     * @return dest
150     * @draft ICU 4.6
151     */
152    virtual UnicodeString &
153    nameToASCII(const UnicodeString &name, UnicodeString &dest,
154                IDNAInfo &info, UErrorCode &errorCode) const = 0;
155
156    /**
157     * Converts a whole domain name into its Unicode form for human-readable display.
158     * If any processing step fails, then info.hasErrors() will be TRUE.
159     * The domain name might be modified according to the types of errors.
160     *
161     * The UErrorCode indicates an error only in exceptional cases,
162     * such as a U_MEMORY_ALLOCATION_ERROR.
163     *
164     * @param name Input domain name
165     * @param dest Destination string object
166     * @param info Output container of IDNA processing details.
167     * @param errorCode Standard ICU error code. Its input value must
168     *                  pass the U_SUCCESS() test, or else the function returns
169     *                  immediately. Check for U_FAILURE() on output or use with
170     *                  function chaining. (See User Guide for details.)
171     * @return dest
172     * @draft ICU 4.6
173     */
174    virtual UnicodeString &
175    nameToUnicode(const UnicodeString &name, UnicodeString &dest,
176                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
177
178    // UTF-8 versions of the processing methods ---------------------------- ***
179
180    /**
181     * Converts a single domain name label into its ASCII form for DNS lookup.
182     * UTF-8 version of labelToASCII(), same behavior.
183     *
184     * @param label Input domain name label
185     * @param dest Destination byte sink; Flush()ed if successful
186     * @param info Output container of IDNA processing details.
187     * @param errorCode Standard ICU error code. Its input value must
188     *                  pass the U_SUCCESS() test, or else the function returns
189     *                  immediately. Check for U_FAILURE() on output or use with
190     *                  function chaining. (See User Guide for details.)
191     * @return dest
192     * @draft ICU 4.6
193     */
194    virtual void
195    labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
196                      IDNAInfo &info, UErrorCode &errorCode) const;
197
198    /**
199     * Converts a single domain name label into its Unicode form for human-readable display.
200     * UTF-8 version of labelToUnicode(), same behavior.
201     *
202     * @param label Input domain name label
203     * @param dest Destination byte sink; Flush()ed if successful
204     * @param info Output container of IDNA processing details.
205     * @param errorCode Standard ICU error code. Its input value must
206     *                  pass the U_SUCCESS() test, or else the function returns
207     *                  immediately. Check for U_FAILURE() on output or use with
208     *                  function chaining. (See User Guide for details.)
209     * @return dest
210     * @draft ICU 4.6
211     */
212    virtual void
213    labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
214                       IDNAInfo &info, UErrorCode &errorCode) const;
215
216    /**
217     * Converts a whole domain name into its ASCII form for DNS lookup.
218     * UTF-8 version of nameToASCII(), same behavior.
219     *
220     * @param name Input domain name
221     * @param dest Destination byte sink; Flush()ed if successful
222     * @param info Output container of IDNA processing details.
223     * @param errorCode Standard ICU error code. Its input value must
224     *                  pass the U_SUCCESS() test, or else the function returns
225     *                  immediately. Check for U_FAILURE() on output or use with
226     *                  function chaining. (See User Guide for details.)
227     * @return dest
228     * @draft ICU 4.6
229     */
230    virtual void
231    nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
232                     IDNAInfo &info, UErrorCode &errorCode) const;
233
234    /**
235     * Converts a whole domain name into its Unicode form for human-readable display.
236     * UTF-8 version of nameToUnicode(), same behavior.
237     *
238     * @param name Input domain name
239     * @param dest Destination byte sink; Flush()ed if successful
240     * @param info Output container of IDNA processing details.
241     * @param errorCode Standard ICU error code. Its input value must
242     *                  pass the U_SUCCESS() test, or else the function returns
243     *                  immediately. Check for U_FAILURE() on output or use with
244     *                  function chaining. (See User Guide for details.)
245     * @return dest
246     * @draft ICU 4.6
247     */
248    virtual void
249    nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
250                      IDNAInfo &info, UErrorCode &errorCode) const;
251
252private:
253    // No ICU "poor man's RTTI" for this class nor its subclasses.
254    virtual UClassID getDynamicClassID() const;
255};
256
257class UTS46;
258
259/**
260 * Output container for IDNA processing errors.
261 * The IDNAInfo class is not suitable for subclassing.
262 * @draft ICU 4.6
263 */
264class U_COMMON_API IDNAInfo : public UMemory {
265public:
266    /**
267     * Constructor for stack allocation.
268     * @draft ICU 4.6
269     */
270    IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
271    /**
272     * Were there IDNA processing errors?
273     * @return TRUE if there were processing errors
274     * @draft ICU 4.6
275     */
276    UBool hasErrors() const { return errors!=0; }
277    /**
278     * Returns a bit set indicating IDNA processing errors.
279     * See UIDNA_ERROR_... constants in uidna.h.
280     * @return bit set of processing errors
281     * @draft ICU 4.6
282     */
283    uint32_t getErrors() const { return errors; }
284    /**
285     * Returns TRUE if transitional and nontransitional processing produce different results.
286     * This is the case when the input label or domain name contains
287     * one or more deviation characters outside a Punycode label (see UTS #46).
288     * <ul>
289     * <li>With nontransitional processing, such characters are
290     * copied to the destination string.
291     * <li>With transitional processing, such characters are
292     * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
293     * </ul>
294     * @return TRUE if transitional and nontransitional processing produce different results
295     * @draft ICU 4.6
296     */
297    UBool isTransitionalDifferent() const { return isTransDiff; }
298
299private:
300    friend class UTS46;
301
302    IDNAInfo(const IDNAInfo &other);  // no copying
303    IDNAInfo &operator=(const IDNAInfo &other);  // no copying
304
305    void reset() {
306        errors=labelErrors=0;
307        isTransDiff=FALSE;
308        isBiDi=FALSE;
309        isOkBiDi=TRUE;
310    }
311
312    uint32_t errors, labelErrors;
313    UBool isTransDiff;
314    UBool isBiDi;
315    UBool isOkBiDi;
316};
317
318U_NAMESPACE_END
319
320#endif  // UCONFIG_NO_IDNA
321#endif  // __IDNA_H__
322