1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19package org.apache.xml.serializer.utils;
20
21import java.util.Arrays;
22
23/**
24 * THIS IS A COPY OF THE XERCES-2J CLASS org.apache.xerces.utls.XMLChar
25 *
26 * This class defines the basic properties of characters in XML 1.1. The data
27 * in this class can be used to verify that a character is a valid
28 * XML 1.1 character or if the character is a space, name start, or name
29 * character.
30 * <p>
31 * A series of convenience methods are supplied to ease the burden
32 * of the developer.  Using the character as an index into the <code>XML11CHARS</code>
33 * array and applying the appropriate mask flag (e.g.
34 * <code>MASK_VALID</code>), yields the same results as calling the
35 * convenience methods. There is one exception: check the comments
36 * for the <code>isValid</code> method for details.
37 *
38 * @author Glenn Marcy, IBM
39 * @author Andy Clark, IBM
40 * @author Arnaud  Le Hors, IBM
41 * @author Neil Graham, IBM
42 * @author Michael Glavassevich, IBM
43 *
44 * @version $Id: $
45 */
46public class XML11Char {
47
48    //
49    // Constants
50    //
51
52    /** Character flags for XML 1.1. */
53    private static final byte XML11CHARS [] = new byte [1 << 16];
54
55    /** XML 1.1 Valid character mask. */
56    public static final int MASK_XML11_VALID = 0x01;
57
58    /** XML 1.1 Space character mask. */
59    public static final int MASK_XML11_SPACE = 0x02;
60
61    /** XML 1.1 Name start character mask. */
62    public static final int MASK_XML11_NAME_START = 0x04;
63
64    /** XML 1.1 Name character mask. */
65    public static final int MASK_XML11_NAME = 0x08;
66
67    /** XML 1.1 control character mask */
68    public static final int MASK_XML11_CONTROL = 0x10;
69
70    /** XML 1.1 content for external entities (valid - "special" chars - control chars) */
71    public static final int MASK_XML11_CONTENT = 0x20;
72
73    /** XML namespaces 1.1 NCNameStart */
74    public static final int MASK_XML11_NCNAME_START = 0x40;
75
76    /** XML namespaces 1.1 NCName */
77    public static final int MASK_XML11_NCNAME = 0x80;
78
79    /** XML 1.1 content for internal entities (valid - "special" chars) */
80    public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT;
81
82    //
83    // Static initialization
84    //
85
86    static {
87
88        // Initializing the Character Flag Array
89        // Code generated by: XML11CharGenerator.
90
91        Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17
92        XML11CHARS[9] = 35;
93        XML11CHARS[10] = 3;
94        Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17
95        XML11CHARS[13] = 3;
96        Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17
97        XML11CHARS[32] = 35;
98        Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33
99        XML11CHARS[38] = 1;
100        Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33
101        Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87
102        XML11CHARS[47] = 33;
103        Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87
104        XML11CHARS[58] = 45;
105        XML11CHARS[59] = 33;
106        XML11CHARS[60] = 1;
107        Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33
108        Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19
109        Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33
110        XML11CHARS[93] = 1;
111        XML11CHARS[94] = 33;
112        XML11CHARS[95] = -19;
113        XML11CHARS[96] = 33;
114        Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19
115        Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33
116        Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17
117        XML11CHARS[133] = 35;
118        Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17
119        Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33
120        XML11CHARS[183] = -87;
121        Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33
122        Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19
123        XML11CHARS[215] = 33;
124        Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19
125        XML11CHARS[247] = 33;
126        Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19
127        Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87
128        Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19
129        XML11CHARS[894] = 33;
130        Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19
131        Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33
132        Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19
133        Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33
134        XML11CHARS[8232] = 35;
135        Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33
136        Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87
137        Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33
138        Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19
139        Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33
140        Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19
141        Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33
142        Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19
143        Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33
144        Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19
145        Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33
146        Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19
147
148    } // <clinit>()
149
150    //
151    // Public static methods
152    //
153
154    /**
155     * Returns true if the specified character is a space character
156     * as amdended in the XML 1.1 specification.
157     *
158     * @param c The character to check.
159     */
160    public static boolean isXML11Space(int c) {
161        return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
162    } // isXML11Space(int):boolean
163
164    /**
165     * Returns true if the specified character is valid. This method
166     * also checks the surrogate character range from 0x10000 to 0x10FFFF.
167     * <p>
168     * If the program chooses to apply the mask directly to the
169     * <code>XML11CHARS</code> array, then they are responsible for checking
170     * the surrogate character range.
171     *
172     * @param c The character to check.
173     */
174    public static boolean isXML11Valid(int c) {
175        return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
176                || (0x10000 <= c && c <= 0x10FFFF);
177    } // isXML11Valid(int):boolean
178
179    /**
180     * Returns true if the specified character is invalid.
181     *
182     * @param c The character to check.
183     */
184    public static boolean isXML11Invalid(int c) {
185        return !isXML11Valid(c);
186    } // isXML11Invalid(int):boolean
187
188    /**
189     * Returns true if the specified character is valid and permitted outside
190     * of a character reference.
191     * That is, this method will return false for the same set as
192     * isXML11Valid, except it also reports false for "control characters".
193     *
194     * @param c The character to check.
195     */
196    public static boolean isXML11ValidLiteral(int c) {
197        return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0))
198            || (0x10000 <= c && c <= 0x10FFFF));
199    } // isXML11ValidLiteral(int):boolean
200
201    /**
202     * Returns true if the specified character can be considered
203     * content in an external parsed entity.
204     *
205     * @param c The character to check.
206     */
207    public static boolean isXML11Content(int c) {
208        return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) ||
209               (0x10000 <= c && c <= 0x10FFFF);
210    } // isXML11Content(int):boolean
211
212    /**
213     * Returns true if the specified character can be considered
214     * content in an internal parsed entity.
215     *
216     * @param c The character to check.
217     */
218    public static boolean isXML11InternalEntityContent(int c) {
219        return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) ||
220               (0x10000 <= c && c <= 0x10FFFF);
221    } // isXML11InternalEntityContent(int):boolean
222
223    /**
224     * Returns true if the specified character is a valid name start
225     * character as defined by production [4] in the XML 1.1
226     * specification.
227     *
228     * @param c The character to check.
229     */
230    public static boolean isXML11NameStart(int c) {
231        return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
232            || (0x10000 <= c && c < 0xF0000);
233    } // isXML11NameStart(int):boolean
234
235    /**
236     * Returns true if the specified character is a valid name
237     * character as defined by production [4a] in the XML 1.1
238     * specification.
239     *
240     * @param c The character to check.
241     */
242    public static boolean isXML11Name(int c) {
243        return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
244            || (c >= 0x10000 && c < 0xF0000);
245    } // isXML11Name(int):boolean
246
247    /**
248     * Returns true if the specified character is a valid NCName start
249     * character as defined by production [4] in Namespaces in XML
250     * 1.1 recommendation.
251     *
252     * @param c The character to check.
253     */
254    public static boolean isXML11NCNameStart(int c) {
255        return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
256            || (0x10000 <= c && c < 0xF0000);
257    } // isXML11NCNameStart(int):boolean
258
259    /**
260     * Returns true if the specified character is a valid NCName
261     * character as defined by production [5] in Namespaces in XML
262     * 1.1 recommendation.
263     *
264     * @param c The character to check.
265     */
266    public static boolean isXML11NCName(int c) {
267        return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
268            || (0x10000 <= c && c < 0xF0000);
269    } // isXML11NCName(int):boolean
270
271    /**
272     * Returns whether the given character is a valid
273     * high surrogate for a name character. This includes
274     * all high surrogates for characters [0x10000-0xEFFFF].
275     * In other words everything excluding planes 15 and 16.
276     *
277     * @param c The character to check.
278     */
279    public static boolean isXML11NameHighSurrogate(int c) {
280        return (0xD800 <= c && c <= 0xDB7F);
281    }
282
283    /*
284     * [5] Name ::= NameStartChar NameChar*
285     */
286    /**
287     * Check to see if a string is a valid Name according to [5]
288     * in the XML 1.1 Recommendation
289     *
290     * @param name string to check
291     * @return true if name is a valid Name
292     */
293    public static boolean isXML11ValidName(String name) {
294        int length = name.length();
295        if (length == 0)
296            return false;
297        int i = 1;
298        char ch = name.charAt(0);
299        if( !isXML11NameStart(ch) ) {
300            if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
301                char ch2 = name.charAt(1);
302                if ( !XMLChar.isLowSurrogate(ch2) ||
303                     !isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
304                    return false;
305                }
306                i = 2;
307            }
308            else {
309                return false;
310            }
311        }
312        while (i < length) {
313            ch = name.charAt(i);
314            if ( !isXML11Name(ch) ) {
315                if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
316                    char ch2 = name.charAt(i);
317                    if ( !XMLChar.isLowSurrogate(ch2) ||
318                         !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
319                        return false;
320                    }
321                }
322                else {
323                    return false;
324                }
325            }
326            ++i;
327        }
328        return true;
329    } // isXML11ValidName(String):boolean
330
331
332    /*
333     * from the namespace 1.1 rec
334     * [4] NCName ::= NCNameStartChar NCNameChar*
335     */
336    /**
337     * Check to see if a string is a valid NCName according to [4]
338     * from the XML Namespaces 1.1 Recommendation
339     *
340     * @param ncName string to check
341     * @return true if name is a valid NCName
342     */
343    public static boolean isXML11ValidNCName(String ncName) {
344        int length = ncName.length();
345        if (length == 0)
346            return false;
347        int i = 1;
348        char ch = ncName.charAt(0);
349        if( !isXML11NCNameStart(ch) ) {
350            if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
351                char ch2 = ncName.charAt(1);
352                if ( !XMLChar.isLowSurrogate(ch2) ||
353                     !isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
354                    return false;
355                }
356                i = 2;
357            }
358            else {
359                return false;
360            }
361        }
362        while (i < length) {
363            ch = ncName.charAt(i);
364            if ( !isXML11NCName(ch) ) {
365                if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
366                    char ch2 = ncName.charAt(i);
367                    if ( !XMLChar.isLowSurrogate(ch2) ||
368                         !isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
369                        return false;
370                    }
371                }
372                else {
373                    return false;
374                }
375            }
376            ++i;
377        }
378        return true;
379    } // isXML11ValidNCName(String):boolean
380
381    /*
382     * [7] Nmtoken ::= (NameChar)+
383     */
384    /**
385     * Check to see if a string is a valid Nmtoken according to [7]
386     * in the XML 1.1 Recommendation
387     *
388     * @param nmtoken string to check
389     * @return true if nmtoken is a valid Nmtoken
390     */
391    public static boolean isXML11ValidNmtoken(String nmtoken) {
392        int length = nmtoken.length();
393        if (length == 0)
394            return false;
395        for (int i = 0; i < length; ++i ) {
396            char ch = nmtoken.charAt(i);
397            if( !isXML11Name(ch) ) {
398                if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
399                    char ch2 = nmtoken.charAt(i);
400                    if ( !XMLChar.isLowSurrogate(ch2) ||
401                         !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
402                        return false;
403                    }
404                }
405                else {
406                    return false;
407                }
408            }
409        }
410        return true;
411    } // isXML11ValidName(String):boolean
412
413} // class XML11Char
414
415