1/*
2*******************************************************************************
3*   Copyright (C) 2010-2012, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  idna.h
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2010mar05
12*   created by: Markus W. Scherer
13*/
14
15#ifndef __IDNA_H__
16#define __IDNA_H__
17
18/**
19 * \file
20 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
21 */
22
23#include "unicode/utypes.h"
24
25#if !UCONFIG_NO_IDNA
26
27#include "unicode/bytestream.h"
28#include "unicode/stringpiece.h"
29#include "unicode/uidna.h"
30#include "unicode/unistr.h"
31
32U_NAMESPACE_BEGIN
33
34class IDNAInfo;
35
36/**
37 * Abstract base class for IDNA processing.
38 * See http://www.unicode.org/reports/tr46/
39 * and http://www.ietf.org/rfc/rfc3490.txt
40 *
41 * The IDNA class is not intended for public subclassing.
42 *
43 * This C++ API currently only implements UTS #46.
44 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
45 * and IDNA2003 (functions that do not use a service object).
46 * @stable ICU 4.6
47 */
48class U_COMMON_API IDNA : public UObject {
49public:
50    /**
51     * Destructor.
52     * @stable ICU 4.6
53     */
54    ~IDNA();
55
56    /**
57     * Returns an IDNA instance which implements UTS #46.
58     * Returns an unmodifiable instance, owned by the caller.
59     * Cache it for multiple operations, and delete it when done.
60     * The instance is thread-safe, that is, it can be used concurrently.
61     *
62     * UTS #46 defines Unicode IDNA Compatibility Processing,
63     * updated to the latest version of Unicode and compatible with both
64     * IDNA2003 and IDNA2008.
65     *
66     * The worker functions use transitional processing, including deviation mappings,
67     * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
68     * is used in which case the deviation characters are passed through without change.
69     *
70     * Disallowed characters are mapped to U+FFFD.
71     *
72     * For available options see the uidna.h header.
73     * Operations with the UTS #46 instance do not support the
74     * UIDNA_ALLOW_UNASSIGNED option.
75     *
76     * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
77     * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
78     * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
79     *
80     * @param options Bit set to modify the processing and error checking.
81     *                See option bit set values in uidna.h.
82     * @param errorCode Standard ICU error code. Its input value must
83     *                  pass the U_SUCCESS() test, or else the function returns
84     *                  immediately. Check for U_FAILURE() on output or use with
85     *                  function chaining. (See User Guide for details.)
86     * @return the UTS #46 IDNA instance, if successful
87     * @stable ICU 4.6
88     */
89    static IDNA *
90    createUTS46Instance(uint32_t options, UErrorCode &errorCode);
91
92    /**
93     * Converts a single domain name label into its ASCII form for DNS lookup.
94     * If any processing step fails, then info.hasErrors() will be TRUE and
95     * the result might not be an ASCII string.
96     * The label might be modified according to the types of errors.
97     * Labels with severe errors will be left in (or turned into) their Unicode form.
98     *
99     * The UErrorCode indicates an error only in exceptional cases,
100     * such as a U_MEMORY_ALLOCATION_ERROR.
101     *
102     * @param label Input domain name label
103     * @param dest Destination string object
104     * @param info Output container of IDNA processing details.
105     * @param errorCode Standard ICU error code. Its input value must
106     *                  pass the U_SUCCESS() test, or else the function returns
107     *                  immediately. Check for U_FAILURE() on output or use with
108     *                  function chaining. (See User Guide for details.)
109     * @return dest
110     * @stable ICU 4.6
111     */
112    virtual UnicodeString &
113    labelToASCII(const UnicodeString &label, UnicodeString &dest,
114                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
115
116    /**
117     * Converts a single domain name label into its Unicode form for human-readable display.
118     * If any processing step fails, then info.hasErrors() will be TRUE.
119     * The label might be modified according to the types of errors.
120     *
121     * The UErrorCode indicates an error only in exceptional cases,
122     * such as a U_MEMORY_ALLOCATION_ERROR.
123     *
124     * @param label Input domain name label
125     * @param dest Destination string object
126     * @param info Output container of IDNA processing details.
127     * @param errorCode Standard ICU error code. Its input value must
128     *                  pass the U_SUCCESS() test, or else the function returns
129     *                  immediately. Check for U_FAILURE() on output or use with
130     *                  function chaining. (See User Guide for details.)
131     * @return dest
132     * @stable ICU 4.6
133     */
134    virtual UnicodeString &
135    labelToUnicode(const UnicodeString &label, UnicodeString &dest,
136                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
137
138    /**
139     * Converts a whole domain name into its ASCII form for DNS lookup.
140     * If any processing step fails, then info.hasErrors() will be TRUE and
141     * the result might not be an ASCII string.
142     * The domain name might be modified according to the types of errors.
143     * Labels with severe errors will be left in (or turned into) their Unicode form.
144     *
145     * The UErrorCode indicates an error only in exceptional cases,
146     * such as a U_MEMORY_ALLOCATION_ERROR.
147     *
148     * @param name Input domain name
149     * @param dest Destination string object
150     * @param info Output container of IDNA processing details.
151     * @param errorCode Standard ICU error code. Its input value must
152     *                  pass the U_SUCCESS() test, or else the function returns
153     *                  immediately. Check for U_FAILURE() on output or use with
154     *                  function chaining. (See User Guide for details.)
155     * @return dest
156     * @stable ICU 4.6
157     */
158    virtual UnicodeString &
159    nameToASCII(const UnicodeString &name, UnicodeString &dest,
160                IDNAInfo &info, UErrorCode &errorCode) const = 0;
161
162    /**
163     * Converts a whole domain name into its Unicode form for human-readable display.
164     * If any processing step fails, then info.hasErrors() will be TRUE.
165     * The domain name might be modified according to the types of errors.
166     *
167     * The UErrorCode indicates an error only in exceptional cases,
168     * such as a U_MEMORY_ALLOCATION_ERROR.
169     *
170     * @param name Input domain name
171     * @param dest Destination string object
172     * @param info Output container of IDNA processing details.
173     * @param errorCode Standard ICU error code. Its input value must
174     *                  pass the U_SUCCESS() test, or else the function returns
175     *                  immediately. Check for U_FAILURE() on output or use with
176     *                  function chaining. (See User Guide for details.)
177     * @return dest
178     * @stable ICU 4.6
179     */
180    virtual UnicodeString &
181    nameToUnicode(const UnicodeString &name, UnicodeString &dest,
182                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
183
184    // UTF-8 versions of the processing methods ---------------------------- ***
185
186    /**
187     * Converts a single domain name label into its ASCII form for DNS lookup.
188     * UTF-8 version of labelToASCII(), same behavior.
189     *
190     * @param label Input domain name label
191     * @param dest Destination byte sink; Flush()ed if successful
192     * @param info Output container of IDNA processing details.
193     * @param errorCode Standard ICU error code. Its input value must
194     *                  pass the U_SUCCESS() test, or else the function returns
195     *                  immediately. Check for U_FAILURE() on output or use with
196     *                  function chaining. (See User Guide for details.)
197     * @return dest
198     * @stable ICU 4.6
199     */
200    virtual void
201    labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
202                      IDNAInfo &info, UErrorCode &errorCode) const;
203
204    /**
205     * Converts a single domain name label into its Unicode form for human-readable display.
206     * UTF-8 version of labelToUnicode(), same behavior.
207     *
208     * @param label Input domain name label
209     * @param dest Destination byte sink; Flush()ed if successful
210     * @param info Output container of IDNA processing details.
211     * @param errorCode Standard ICU error code. Its input value must
212     *                  pass the U_SUCCESS() test, or else the function returns
213     *                  immediately. Check for U_FAILURE() on output or use with
214     *                  function chaining. (See User Guide for details.)
215     * @return dest
216     * @stable ICU 4.6
217     */
218    virtual void
219    labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
220                       IDNAInfo &info, UErrorCode &errorCode) const;
221
222    /**
223     * Converts a whole domain name into its ASCII form for DNS lookup.
224     * UTF-8 version of nameToASCII(), same behavior.
225     *
226     * @param name Input domain name
227     * @param dest Destination byte sink; Flush()ed if successful
228     * @param info Output container of IDNA processing details.
229     * @param errorCode Standard ICU error code. Its input value must
230     *                  pass the U_SUCCESS() test, or else the function returns
231     *                  immediately. Check for U_FAILURE() on output or use with
232     *                  function chaining. (See User Guide for details.)
233     * @return dest
234     * @stable ICU 4.6
235     */
236    virtual void
237    nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
238                     IDNAInfo &info, UErrorCode &errorCode) const;
239
240    /**
241     * Converts a whole domain name into its Unicode form for human-readable display.
242     * UTF-8 version of nameToUnicode(), same behavior.
243     *
244     * @param name Input domain name
245     * @param dest Destination byte sink; Flush()ed if successful
246     * @param info Output container of IDNA processing details.
247     * @param errorCode Standard ICU error code. Its input value must
248     *                  pass the U_SUCCESS() test, or else the function returns
249     *                  immediately. Check for U_FAILURE() on output or use with
250     *                  function chaining. (See User Guide for details.)
251     * @return dest
252     * @stable ICU 4.6
253     */
254    virtual void
255    nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
256                      IDNAInfo &info, UErrorCode &errorCode) const;
257};
258
259class UTS46;
260
261/**
262 * Output container for IDNA processing errors.
263 * The IDNAInfo class is not suitable for subclassing.
264 * @stable ICU 4.6
265 */
266class U_COMMON_API IDNAInfo : public UMemory {
267public:
268    /**
269     * Constructor for stack allocation.
270     * @stable ICU 4.6
271     */
272    IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
273    /**
274     * Were there IDNA processing errors?
275     * @return TRUE if there were processing errors
276     * @stable ICU 4.6
277     */
278    UBool hasErrors() const { return errors!=0; }
279    /**
280     * Returns a bit set indicating IDNA processing errors.
281     * See UIDNA_ERROR_... constants in uidna.h.
282     * @return bit set of processing errors
283     * @stable ICU 4.6
284     */
285    uint32_t getErrors() const { return errors; }
286    /**
287     * Returns TRUE if transitional and nontransitional processing produce different results.
288     * This is the case when the input label or domain name contains
289     * one or more deviation characters outside a Punycode label (see UTS #46).
290     * <ul>
291     * <li>With nontransitional processing, such characters are
292     * copied to the destination string.
293     * <li>With transitional processing, such characters are
294     * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
295     * </ul>
296     * @return TRUE if transitional and nontransitional processing produce different results
297     * @stable ICU 4.6
298     */
299    UBool isTransitionalDifferent() const { return isTransDiff; }
300
301private:
302    friend class UTS46;
303
304    IDNAInfo(const IDNAInfo &other);  // no copying
305    IDNAInfo &operator=(const IDNAInfo &other);  // no copying
306
307    void reset() {
308        errors=labelErrors=0;
309        isTransDiff=FALSE;
310        isBiDi=FALSE;
311        isOkBiDi=TRUE;
312    }
313
314    uint32_t errors, labelErrors;
315    UBool isTransDiff;
316    UBool isBiDi;
317    UBool isOkBiDi;
318};
319
320U_NAMESPACE_END
321
322#endif  // UCONFIG_NO_IDNA
323#endif  // __IDNA_H__
324