1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18/*
19 * $Id$
20 */
21
22package org.apache.xml.utils;
23
24/**
25 * This class defines the basic XML character properties. The data
26 * in this class can be used to verify that a character is a valid
27 * XML character or if the character is a space, name start, or name
28 * character.
29 * <p>
30 * A series of convenience methods are supplied to ease the burden
31 * of the developer. Because inlining the checks can improve per
32 * character performance, the tables of character properties are
33 * public. Using the character as an index into the <code>CHARS</code>
34 * array and applying the appropriate mask flag (e.g.
35 * <code>MASK_VALID</code>), yields the same results as calling the
36 * convenience methods. There is one exception: check the comments
37 * for the <code>isValid</code> method for details.
38 *
39 * @author Glenn Marcy, IBM
40 * @author Andy Clark, IBM
41 * @author Eric Ye, IBM
42 * @author Arnaud  Le Hors, IBM
43 * @author Rahul Srivastava, Sun Microsystems Inc.
44 *
45 * @version $Id: XMLChar.java,v 1.7 2002/01/29 01:15:18 lehors Exp $
46 */
47public class XMLChar {
48
49    //
50    // Constants
51    //
52
53    /** Character flags. */
54    private static final byte[] CHARS = new byte[1 << 16];
55
56    /** Valid character mask. */
57    public static final int MASK_VALID = 0x01;
58
59    /** Space character mask. */
60    public static final int MASK_SPACE = 0x02;
61
62    /** Name start character mask. */
63    public static final int MASK_NAME_START = 0x04;
64
65    /** Name character mask. */
66    public static final int MASK_NAME = 0x08;
67
68    /** Pubid character mask. */
69    public static final int MASK_PUBID = 0x10;
70
71    /**
72     * Content character mask. Special characters are those that can
73     * be considered the start of markup, such as '&lt;' and '&amp;'.
74     * The various newline characters are considered special as well.
75     * All other valid XML characters can be considered content.
76     * <p>
77     * This is an optimization for the inner loop of character scanning.
78     */
79    public static final int MASK_CONTENT = 0x20;
80
81    /** NCName start character mask. */
82    public static final int MASK_NCNAME_START = 0x40;
83
84    /** NCName character mask. */
85    public static final int MASK_NCNAME = 0x80;
86
87    //
88    // Static initialization
89    //
90
91    static {
92
93        //
94        // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
95        //              [#xE000-#xFFFD] | [#x10000-#x10FFFF]
96        //
97
98        int charRange[] = {
99            0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
100        };
101
102        //
103        // [3] S ::= (#x20 | #x9 | #xD | #xA)+
104        //
105
106        int spaceChar[] = {
107            0x0020, 0x0009, 0x000D, 0x000A,
108        };
109
110        //
111        // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
112        //                  CombiningChar | Extender
113        //
114
115        int nameChar[] = {
116            0x002D, 0x002E, // '-' and '.'
117        };
118
119        //
120        // [5] Name ::= (Letter | '_' | ':') (NameChar)*
121        //
122
123        int nameStartChar[] = {
124            0x003A, 0x005F, // ':' and '_'
125        };
126
127        //
128        // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
129        //
130
131        int pubidChar[] = {
132            0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
133            0x005F
134        };
135
136        int pubidRange[] = {
137            0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
138        };
139
140        //
141        // [84] Letter ::= BaseChar | Ideographic
142        //
143
144        int letterRange[] = {
145            // BaseChar
146            0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
147            0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
148            0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
149            0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
150            0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
151            0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
152            0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
153            0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
154            0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
155            0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
156            0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
157            0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
158            0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
159            0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
160            0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
161            0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
162            0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
163            0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
164            0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
165            0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
166            0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
167            0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
168            0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
169            0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
170            0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
171            0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
172            0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
173            0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
174            0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
175            0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
176            0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
177            0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
178            0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
179            0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
180            0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
181            0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
182            0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
183            0xAC00, 0xD7A3,
184            // Ideographic
185            0x3021, 0x3029, 0x4E00, 0x9FA5,
186        };
187        int letterChar[] = {
188            // BaseChar
189            0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
190            0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
191            0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
192            0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
193            0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
194            0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
195            0x1F5D, 0x1FBE, 0x2126, 0x212E,
196            // Ideographic
197            0x3007,
198        };
199
200        //
201        // [87] CombiningChar ::= ...
202        //
203
204        int combiningCharRange[] = {
205            0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
206            0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
207            0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
208            0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
209            0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
210            0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
211            0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
212            0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
213            0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
214            0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
215            0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
216            0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
217            0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
218            0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
219            0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
220            0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
221            0x20D0, 0x20DC, 0x302A, 0x302F,
222        };
223
224        int combiningCharChar[] = {
225            0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
226            0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
227            0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
228            0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
229        };
230
231        //
232        // [88] Digit ::= ...
233        //
234
235        int digitRange[] = {
236            0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
237            0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
238            0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
239            0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
240        };
241
242        //
243        // [89] Extender ::= ...
244        //
245
246        int extenderRange[] = {
247            0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
248        };
249
250        int extenderChar[] = {
251            0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
252        };
253
254        //
255        // SpecialChar ::= '<', '&', '\n', '\r', ']'
256        //
257
258        int specialChar[] = {
259            '<', '&', '\n', '\r', ']',
260        };
261
262        //
263        // Initialize
264        //
265
266        // set valid characters
267        for (int i = 0; i < charRange.length; i += 2) {
268            for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
269                CHARS[j] |= MASK_VALID | MASK_CONTENT;
270            }
271        }
272
273        // remove special characters
274        for (int i = 0; i < specialChar.length; i++) {
275            CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
276        }
277
278        // set space characters
279        for (int i = 0; i < spaceChar.length; i++) {
280            CHARS[spaceChar[i]] |= MASK_SPACE;
281        }
282
283        // set name start characters
284        for (int i = 0; i < nameStartChar.length; i++) {
285            CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
286                                       MASK_NCNAME_START | MASK_NCNAME;
287        }
288        for (int i = 0; i < letterRange.length; i += 2) {
289            for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
290                CHARS[j] |= MASK_NAME_START | MASK_NAME |
291                            MASK_NCNAME_START | MASK_NCNAME;
292            }
293        }
294        for (int i = 0; i < letterChar.length; i++) {
295            CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
296                                    MASK_NCNAME_START | MASK_NCNAME;
297        }
298
299        // set name characters
300        for (int i = 0; i < nameChar.length; i++) {
301            CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
302        }
303        for (int i = 0; i < digitRange.length; i += 2) {
304            for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
305                CHARS[j] |= MASK_NAME | MASK_NCNAME;
306            }
307        }
308        for (int i = 0; i < combiningCharRange.length; i += 2) {
309            for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
310                CHARS[j] |= MASK_NAME | MASK_NCNAME;
311            }
312        }
313        for (int i = 0; i < combiningCharChar.length; i++) {
314            CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
315        }
316        for (int i = 0; i < extenderRange.length; i += 2) {
317            for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
318                CHARS[j] |= MASK_NAME | MASK_NCNAME;
319            }
320        }
321        for (int i = 0; i < extenderChar.length; i++) {
322            CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
323        }
324
325        // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
326        CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
327
328        // set Pubid characters
329        for (int i = 0; i < pubidChar.length; i++) {
330            CHARS[pubidChar[i]] |= MASK_PUBID;
331        }
332        for (int i = 0; i < pubidRange.length; i += 2) {
333            for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
334                CHARS[j] |= MASK_PUBID;
335            }
336        }
337
338    } // <clinit>()
339
340    //
341    // Public static methods
342    //
343
344    /**
345     * Returns true if the specified character is a supplemental character.
346     *
347     * @param c The character to check.
348     */
349    public static boolean isSupplemental(int c) {
350        return (c >= 0x10000 && c <= 0x10FFFF);
351    }
352
353    /**
354     * Returns true the supplemental character corresponding to the given
355     * surrogates.
356     *
357     * @param h The high surrogate.
358     * @param l The low surrogate.
359     */
360    public static int supplemental(char h, char l) {
361        return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
362    }
363
364    /**
365     * Returns the high surrogate of a supplemental character
366     *
367     * @param c The supplemental character to "split".
368     */
369    public static char highSurrogate(int c) {
370        return (char) (((c - 0x00010000) >> 10) + 0xD800);
371    }
372
373    /**
374     * Returns the low surrogate of a supplemental character
375     *
376     * @param c The supplemental character to "split".
377     */
378    public static char lowSurrogate(int c) {
379        return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
380    }
381
382    /**
383     * Returns whether the given character is a high surrogate
384     *
385     * @param c The character to check.
386     */
387    public static boolean isHighSurrogate(int c) {
388        return (0xD800 <= c && c <= 0xDBFF);
389    }
390
391    /**
392     * Returns whether the given character is a low surrogate
393     *
394     * @param c The character to check.
395     */
396    public static boolean isLowSurrogate(int c) {
397        return (0xDC00 <= c && c <= 0xDFFF);
398    }
399
400
401    /**
402     * Returns true if the specified character is valid. This method
403     * also checks the surrogate character range from 0x10000 to 0x10FFFF.
404     * <p>
405     * If the program chooses to apply the mask directly to the
406     * <code>CHARS</code> array, then they are responsible for checking
407     * the surrogate character range.
408     *
409     * @param c The character to check.
410     */
411    public static boolean isValid(int c) {
412        return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
413               (0x10000 <= c && c <= 0x10FFFF);
414    } // isValid(int):boolean
415
416    /**
417     * Returns true if the specified character is invalid.
418     *
419     * @param c The character to check.
420     */
421    public static boolean isInvalid(int c) {
422        return !isValid(c);
423    } // isInvalid(int):boolean
424
425    /**
426     * Returns true if the specified character can be considered content.
427     *
428     * @param c The character to check.
429     */
430    public static boolean isContent(int c) {
431        return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
432               (0x10000 <= c && c <= 0x10FFFF);
433    } // isContent(int):boolean
434
435    /**
436     * Returns true if the specified character can be considered markup.
437     * Markup characters include '&lt;', '&amp;', and '%'.
438     *
439     * @param c The character to check.
440     */
441    public static boolean isMarkup(int c) {
442        return c == '<' || c == '&' || c == '%';
443    } // isMarkup(int):boolean
444
445    /**
446     * Returns true if the specified character is a space character
447     * as defined by production [3] in the XML 1.0 specification.
448     *
449     * @param c The character to check.
450     */
451    public static boolean isSpace(int c) {
452        return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
453    } // isSpace(int):boolean
454
455    /**
456     * Returns true if the specified character is a valid name start
457     * character as defined by production [5] in the XML 1.0
458     * specification.
459     *
460     * @param c The character to check.
461     */
462    public static boolean isNameStart(int c) {
463        return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
464    } // isNameStart(int):boolean
465
466    /**
467     * Returns true if the specified character is a valid name
468     * character as defined by production [4] in the XML 1.0
469     * specification.
470     *
471     * @param c The character to check.
472     */
473    public static boolean isName(int c) {
474        return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
475    } // isName(int):boolean
476
477    /**
478     * Returns true if the specified character is a valid NCName start
479     * character as defined by production [4] in Namespaces in XML
480     * recommendation.
481     *
482     * @param c The character to check.
483     */
484    public static boolean isNCNameStart(int c) {
485        return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
486    } // isNCNameStart(int):boolean
487
488    /**
489     * Returns true if the specified character is a valid NCName
490     * character as defined by production [5] in Namespaces in XML
491     * recommendation.
492     *
493     * @param c The character to check.
494     */
495    public static boolean isNCName(int c) {
496        return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
497    } // isNCName(int):boolean
498
499    /**
500     * Returns true if the specified character is a valid Pubid
501     * character as defined by production [13] in the XML 1.0
502     * specification.
503     *
504     * @param c The character to check.
505     */
506    public static boolean isPubid(int c) {
507        return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
508    } // isPubid(int):boolean
509
510    /*
511     * [5] Name ::= (Letter | '_' | ':') (NameChar)*
512     */
513    /**
514     * Check to see if a string is a valid Name according to [5]
515     * in the XML 1.0 Recommendation
516     *
517     * @param name string to check
518     * @return true if name is a valid Name
519     */
520    public static boolean isValidName(String name) {
521        if (name.length() == 0)
522            return false;
523        char ch = name.charAt(0);
524        if( isNameStart(ch) == false)
525           return false;
526        for (int i = 1; i < name.length(); i++ ) {
527           ch = name.charAt(i);
528           if( isName( ch ) == false ){
529              return false;
530           }
531        }
532        return true;
533    } // isValidName(String):boolean
534
535
536    /*
537     * from the namespace rec
538     * [4] NCName ::= (Letter | '_') (NCNameChar)*
539     */
540    /**
541     * Check to see if a string is a valid NCName according to [4]
542     * from the XML Namespaces 1.0 Recommendation
543     *
544     * @param ncName string to check
545     * @return true if name is a valid NCName
546     */
547    public static boolean isValidNCName(String ncName) {
548        if (ncName.length() == 0)
549            return false;
550        char ch = ncName.charAt(0);
551        if( isNCNameStart(ch) == false)
552           return false;
553        for (int i = 1; i < ncName.length(); i++ ) {
554           ch = ncName.charAt(i);
555           if( isNCName( ch ) == false ){
556              return false;
557           }
558        }
559        return true;
560    } // isValidNCName(String):boolean
561
562    /*
563     * [7] Nmtoken ::= (NameChar)+
564     */
565    /**
566     * Check to see if a string is a valid Nmtoken according to [7]
567     * in the XML 1.0 Recommendation
568     *
569     * @param nmtoken string to check
570     * @return true if nmtoken is a valid Nmtoken
571     */
572    public static boolean isValidNmtoken(String nmtoken) {
573        if (nmtoken.length() == 0)
574            return false;
575        for (int i = 0; i < nmtoken.length(); i++ ) {
576           char ch = nmtoken.charAt(i);
577           if(  ! isName( ch ) ){
578              return false;
579           }
580        }
581        return true;
582    } // isValidName(String):boolean
583
584
585
586
587
588    // encodings
589
590    /**
591     * Returns true if the encoding name is a valid IANA encoding.
592     * This method does not verify that there is a decoder available
593     * for this encoding, only that the characters are valid for an
594     * IANA encoding name.
595     *
596     * @param ianaEncoding The IANA encoding name.
597     */
598    public static boolean isValidIANAEncoding(String ianaEncoding) {
599        if (ianaEncoding != null) {
600            int length = ianaEncoding.length();
601            if (length > 0) {
602                char c = ianaEncoding.charAt(0);
603                if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
604                    for (int i = 1; i < length; i++) {
605                        c = ianaEncoding.charAt(i);
606                        if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
607                            (c < '0' || c > '9') && c != '.' && c != '_' &&
608                            c != '-') {
609                            return false;
610                        }
611                    }
612                    return true;
613                }
614            }
615        }
616        return false;
617    } // isValidIANAEncoding(String):boolean
618
619    /**
620     * Returns true if the encoding name is a valid Java encoding.
621     * This method does not verify that there is a decoder available
622     * for this encoding, only that the characters are valid for an
623     * Java encoding name.
624     *
625     * @param javaEncoding The Java encoding name.
626     */
627    public static boolean isValidJavaEncoding(String javaEncoding) {
628        if (javaEncoding != null) {
629            int length = javaEncoding.length();
630            if (length > 0) {
631                for (int i = 1; i < length; i++) {
632                    char c = javaEncoding.charAt(i);
633                    if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
634                        (c < '0' || c > '9') && c != '.' && c != '_' &&
635                        c != '-') {
636                        return false;
637                    }
638                }
639                return true;
640            }
641        }
642        return false;
643    } // isValidIANAEncoding(String):boolean
644
645   /**
646     * Simple check to determine if qname is legal. If it returns false
647     * then <param>str</param> is illegal; if it returns true then
648     * <param>str</param> is legal.
649     */
650    public static boolean isValidQName(String str) {
651
652       final int colon = str.indexOf(':');
653
654       if (colon == 0 || colon == str.length() - 1) {
655           return false;
656       }
657
658       if (colon > 0) {
659           final String prefix = str.substring(0,colon);
660           final String localPart = str.substring(colon+1);
661           return isValidNCName(prefix) && isValidNCName(localPart);
662       }
663       else {
664           return isValidNCName(str);
665       }
666    }
667
668} // class XMLChar
669