1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *   Copyright (C) 2003-2011, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   file name:  nptrans.h
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   created on: 2003feb1
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   created by: Ram Viswanadha
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef NPTRANS_H
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define NPTRANS_H
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_IDNA
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ures.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/translit.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "intltest.h"
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define ASCII_SPACE 0x0020
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass NamePrepTransform {
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate :
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator *mapping;
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeSet unassigned;
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeSet prohibited;
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeSet labelSeparatorSet;
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UResourceBundle *bundle;
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NamePrepTransform(UParseError& parseError, UErrorCode& status);
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic :
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual ~NamePrepTransform();
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    inline UBool isProhibited(UChar32 ch);
53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for the actual class.
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    inline UClassID getDynamicClassID() const { return getStaticClassID(); }
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * ICU "poor man's RTTI", returns a UClassID for this class.
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Map every character in input stream with mapping character
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * in the mapping table and populate the output stream.
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * For any individual character the mapping table may specify
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * that that a character be mapped to nothing, mapped to one
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * other character or to a string of other characters.
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param src           Pointer to UChar buffer containing a single label
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param srcLength     Number of characters in the source label
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param dest          Pointer to the destination buffer to receive the output
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param destCapacity  The capacity of destination array
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *                          If TRUE unassigned values are treated as normal Unicode code point.
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param status        ICU error code in/out parameter.
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *                      Must fulfill U_SUCCESS before the function call.
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return The number of UChars in the destination buffer
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t map(const UChar* src, int32_t srcLength,
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UChar* dest, int32_t destCapacity,
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UBool allowUnassigned,
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UParseError* parseError,
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UErrorCode& status );
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * checks for prohited and BiDi characters in the order defined by RFC 3454
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param src           Pointer to UChar buffer containing a single label
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param srcLength     Number of characters in the source label
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param dest          Pointer to the destination buffer to receive the output
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param destCapacity  The capacity of destination array
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *                          If TRUE unassigned values are treated as normal Unicode code point.
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param status        ICU error code in/out parameter.
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *                      Must fulfill U_SUCCESS before the function call.
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return The number of UChars in the destination buffer
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t process(const UChar* src, int32_t srcLength,
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UChar* dest, int32_t destCapacity,
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UBool allowUnassigned,
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UParseError* parseError,
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UErrorCode& status );
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Ascertain if the given code point is a label separator as specified by IDNA
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return TRUE is the code point is a label separator
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    inline UBool isLDHChar(UChar32 ch);
117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate:
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The address of this static class variable serves as this class's ID
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * for ICU "poor man's RTTI".
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const char fgClassID;
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruinline UBool NamePrepTransform::isLDHChar(UChar32 ch){
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // high runner case
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ch>0x007A){
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if( (ch==0x002D) ||
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        (0x0030 <= ch && ch <= 0x0039) ||
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        (0x0041 <= ch && ch <= 0x005A) ||
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        (0x0061 <= ch && ch <= 0x007A)
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      ){
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return FALSE;
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass NamePrepTransform {
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_IDNA */
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Hey, Emacs, please set the following:
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Local Variables:
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indent-tabs-mode: nil
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * End:
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
158