1/*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2003-2006, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  nptrans.h
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2003feb1
14 *   created by: Ram Viswanadha
15 */
16
17#ifndef NPTRANS_H
18#define NPTRANS_H
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_IDNA
23#if !UCONFIG_NO_TRANSLITERATION
24
25#include "unicode/uniset.h"
26#include "unicode/ures.h"
27#include "unicode/translit.h"
28
29#include "intltest.h"
30
31
32#define ASCII_SPACE 0x0020
33
34class NamePrepTransform {
35
36private :
37    Transliterator *mapping;
38    UnicodeSet unassigned;
39    UnicodeSet prohibited;
40    UnicodeSet labelSeparatorSet;
41    UResourceBundle *bundle;
42    NamePrepTransform(UParseError& parseError, UErrorCode& status);
43
44
45public :
46
47    static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
48
49    virtual ~NamePrepTransform();
50
51
52    inline UBool isProhibited(UChar32 ch);
53
54    /**
55     * ICU "poor man's RTTI", returns a UClassID for the actual class.
56     *
57     * @draft ICU 2.6
58     */
59    inline UClassID getDynamicClassID() const { return getStaticClassID(); }
60
61    /**
62     * ICU "poor man's RTTI", returns a UClassID for this class.
63     *
64     * @draft ICU 2.6
65     */
66    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
67
68    /**
69     * Map every character in input stream with mapping character
70     * in the mapping table and populate the output stream.
71     * For any individual character the mapping table may specify
72     * that that a character be mapped to nothing, mapped to one
73     * other character or to a string of other characters.
74     *
75     * @param src           Pointer to UChar buffer containing a single label
76     * @param srcLength     Number of characters in the source label
77     * @param dest          Pointer to the destination buffer to receive the output
78     * @param destCapacity  The capacity of destination array
79     * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
80     *                          If TRUE unassigned values are treated as normal Unicode code point.
81     *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
82     * @param status        ICU error code in/out parameter.
83     *                      Must fulfill U_SUCCESS before the function call.
84     * @return The number of UChars in the destination buffer
85     *
86     */
87    int32_t map(const UChar* src, int32_t srcLength,
88                        UChar* dest, int32_t destCapacity,
89                        UBool allowUnassigned,
90                        UParseError* parseError,
91                        UErrorCode& status );
92
93    /**
94     * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
95     * checks for prohited and BiDi characters in the order defined by RFC 3454
96     *
97     * @param src           Pointer to UChar buffer containing a single label
98     * @param srcLength     Number of characters in the source label
99     * @param dest          Pointer to the destination buffer to receive the output
100     * @param destCapacity  The capacity of destination array
101     * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
102     *                          If TRUE unassigned values are treated as normal Unicode code point.
103     *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
104     * @param status        ICU error code in/out parameter.
105     *                      Must fulfill U_SUCCESS before the function call.
106     * @return The number of UChars in the destination buffer
107     */
108    int32_t process(const UChar* src, int32_t srcLength,
109                            UChar* dest, int32_t destCapacity,
110                            UBool allowUnassigned,
111                            UParseError* parseError,
112                            UErrorCode& status );
113
114    /**
115     * Ascertain if the given code point is a label separator as specified by IDNA
116     *
117     * @return TRUE is the code point is a label separator
118     *
119     *
120     */
121    UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
122
123
124    inline UBool isLDHChar(UChar32 ch);
125private:
126    /**
127     * The address of this static class variable serves as this class's ID
128     * for ICU "poor man's RTTI".
129     */
130    static const char fgClassID;
131};
132
133inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
134    // high runner case
135    if(ch>0x007A){
136        return FALSE;
137    }
138    //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
139    if( (ch==0x002D) ||
140        (0x0030 <= ch && ch <= 0x0039) ||
141        (0x0041 <= ch && ch <= 0x005A) ||
142        (0x0061 <= ch && ch <= 0x007A)
143      ){
144        return TRUE;
145    }
146    return FALSE;
147}
148
149#endif /* #if !UCONFIG_NO_TRANSLITERATION */
150#else
151class NamePrepTransform {
152};
153#endif /* #if !UCONFIG_NO_IDNA */
154
155#endif
156
157/*
158 * Hey, Emacs, please set the following:
159 *
160 * Local Variables:
161 * indent-tabs-mode: nil
162 * End:
163 *
164 */
165