19f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson/*
29f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Licensed to the Apache Software Foundation (ASF) under one
39f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * or more contributor license agreements. See the NOTICE file
49f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * distributed with this work for additional information
59f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * regarding copyright ownership. The ASF licenses this file
69f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * to you under the Apache License, Version 2.0 (the  "License");
79f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * you may not use this file except in compliance with the License.
89f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * You may obtain a copy of the License at
99f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson *
109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson *     http://www.apache.org/licenses/LICENSE-2.0
119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson *
129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Unless required by applicable law or agreed to in writing, software
139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * distributed under the License is distributed on an "AS IS" BASIS,
149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * See the License for the specific language governing permissions and
169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * limitations under the License.
179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */
189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson/*
199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * $Id$
209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */
219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilsonpackage org.apache.xml.utils;
239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson/**
259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * This class defines the basic XML character properties. The data
269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * in this class can be used to verify that a character is a valid
279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * XML character or if the character is a space, name start, or name
289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * character.
299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * <p>
309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * A series of convenience methods are supplied to ease the burden
319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * of the developer. Because inlining the checks can improve per
329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * character performance, the tables of character properties are
339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * public. Using the character as an index into the <code>CHARS</code>
349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * array and applying the appropriate mask flag (e.g.
359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * <code>MASK_VALID</code>), yields the same results as calling the
369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * convenience methods. There is one exception: check the comments
379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * for the <code>isValid</code> method for details.
389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson *
399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @author Glenn Marcy, IBM
409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @author Andy Clark, IBM
419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @author Eric Ye, IBM
429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @author Arnaud  Le Hors, IBM
439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @author Rahul Srivastava, Sun Microsystems Inc.
449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson *
459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @version $Id: XMLChar.java,v 1.7 2002/01/29 01:15:18 lehors Exp $
469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */
479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilsonpublic class XMLChar {
489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    //
509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    // Constants
519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    //
529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /** Character flags. */
549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    private static final byte[] CHARS = new byte[1 << 16];
559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /** Valid character mask. */
579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static final int MASK_VALID = 0x01;
589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /** Space character mask. */
609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static final int MASK_SPACE = 0x02;
619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /** Name start character mask. */
639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static final int MASK_NAME_START = 0x04;
649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /** Name character mask. */
669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static final int MASK_NAME = 0x08;
679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /** Pubid character mask. */
699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static final int MASK_PUBID = 0x10;
709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Content character mask. Special characters are those that can
739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * be considered the start of markup, such as '&lt;' and '&amp;'.
749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * The various newline characters are considered special as well.
759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * All other valid XML characters can be considered content.
769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * <p>
779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * This is an optimization for the inner loop of character scanning.
789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static final int MASK_CONTENT = 0x20;
809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /** NCName start character mask. */
829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static final int MASK_NCNAME_START = 0x40;
839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /** NCName character mask. */
859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static final int MASK_NCNAME = 0x80;
869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    //
889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    // Static initialization
899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    //
909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    static {
929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //              [#xE000-#xFFFD] | [#x10000-#x10FFFF]
969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int charRange[] = {
999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
1009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
1019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
1039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // [3] S ::= (#x20 | #x9 | #xD | #xA)+
1049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
1059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int spaceChar[] = {
1079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0020, 0x0009, 0x000D, 0x000A,
1089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
1099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
1119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //                  CombiningChar | Extender
1139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
1149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int nameChar[] = {
1169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x002D, 0x002E, // '-' and '.'
1179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
1189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
1209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // [5] Name ::= (Letter | '_' | ':') (NameChar)*
1219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
1229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int nameStartChar[] = {
1249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x003A, 0x005F, // ':' and '_'
1259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
1269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
1289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
1299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
1309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int pubidChar[] = {
1329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
1339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x005F
1349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
1359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int pubidRange[] = {
1379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
1389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
1399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
1419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // [84] Letter ::= BaseChar | Ideographic
1429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
1439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
1449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int letterRange[] = {
1459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            // BaseChar
1469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
1479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
1489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
1499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
1509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
1519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
1529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
1539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
1549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
1559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
1569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
1579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
1589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
1599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
1609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
1619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
1629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
1639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
1649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
1659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
1669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
1679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
1689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
1699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
1709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
1719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
1729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
1739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
1749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
1759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
1769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
1779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
1789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
1799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
1809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
1819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
1829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
1839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0xAC00, 0xD7A3,
1849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            // Ideographic
1859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x3021, 0x3029, 0x4E00, 0x9FA5,
1869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
1879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int letterChar[] = {
1889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            // BaseChar
1899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
1909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
1919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
1929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
1939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
1949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
1959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x1F5D, 0x1FBE, 0x2126, 0x212E,
1969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            // Ideographic
1979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x3007,
1989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
1999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
2019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // [87] CombiningChar ::= ...
2029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
2039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int combiningCharRange[] = {
2059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
2069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
2079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
2089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
2099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
2109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
2119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
2129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
2139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
2149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
2159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
2169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
2179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
2189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
2199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
2209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
2219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x20D0, 0x20DC, 0x302A, 0x302F,
2229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
2239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int combiningCharChar[] = {
2259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
2269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
2279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
2289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
2299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
2309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
2329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // [88] Digit ::= ...
2339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
2349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int digitRange[] = {
2369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
2379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
2389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
2399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
2409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
2419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
2439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // [89] Extender ::= ...
2449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
2459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int extenderRange[] = {
2479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
2489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
2499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int extenderChar[] = {
2519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
2529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
2539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
2559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // SpecialChar ::= '<', '&', '\n', '\r', ']'
2569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
2579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        int specialChar[] = {
2599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            '<', '&', '\n', '\r', ']',
2609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        };
2619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
2639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // Initialize
2649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        //
2659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // set valid characters
2679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < charRange.length; i += 2) {
2689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
2699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                CHARS[j] |= MASK_VALID | MASK_CONTENT;
2709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            }
2719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
2729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // remove special characters
2749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < specialChar.length; i++) {
2759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
2769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
2779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // set space characters
2799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < spaceChar.length; i++) {
2809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            CHARS[spaceChar[i]] |= MASK_SPACE;
2819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
2829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // set name start characters
2849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < nameStartChar.length; i++) {
2859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
2869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                                       MASK_NCNAME_START | MASK_NCNAME;
2879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
2889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < letterRange.length; i += 2) {
2899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
2909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                CHARS[j] |= MASK_NAME_START | MASK_NAME |
2919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                            MASK_NCNAME_START | MASK_NCNAME;
2929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            }
2939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
2949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < letterChar.length; i++) {
2959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
2969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                                    MASK_NCNAME_START | MASK_NCNAME;
2979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
2989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
2999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // set name characters
3009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < nameChar.length; i++) {
3019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
3029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
3039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < digitRange.length; i += 2) {
3049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
3059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                CHARS[j] |= MASK_NAME | MASK_NCNAME;
3069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            }
3079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
3089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < combiningCharRange.length; i += 2) {
3099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
3109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                CHARS[j] |= MASK_NAME | MASK_NCNAME;
3119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            }
3129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
3139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < combiningCharChar.length; i++) {
3149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
3159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
3169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < extenderRange.length; i += 2) {
3179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
3189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                CHARS[j] |= MASK_NAME | MASK_NCNAME;
3199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            }
3209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
3219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < extenderChar.length; i++) {
3229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
3239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
3249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
3259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
3269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
3279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
3289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        // set Pubid characters
3299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < pubidChar.length; i++) {
3309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            CHARS[pubidChar[i]] |= MASK_PUBID;
3319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
3329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < pubidRange.length; i += 2) {
3339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
3349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                CHARS[j] |= MASK_PUBID;
3359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            }
3369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
3379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
3389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // <clinit>()
3399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
3409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    //
3419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    // Public static methods
3429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    //
3439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
3449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
3459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character is a supplemental character.
3469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
3479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
3489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
3499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isSupplemental(int c) {
3509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return (c >= 0x10000 && c <= 0x10FFFF);
3519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    }
3529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
3539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
3549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true the supplemental character corresponding to the given
3559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * surrogates.
3569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
3579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param h The high surrogate.
3589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param l The low surrogate.
3599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
3609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static int supplemental(char h, char l) {
3619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
3629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    }
3639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
3649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
3659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns the high surrogate of a supplemental character
3669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
3679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The supplemental character to "split".
3689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
3699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static char highSurrogate(int c) {
3709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return (char) (((c - 0x00010000) >> 10) + 0xD800);
3719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    }
3729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
3739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
3749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns the low surrogate of a supplemental character
3759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
3769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The supplemental character to "split".
3779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
3789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static char lowSurrogate(int c) {
3799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
3809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    }
3819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
3829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
3839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns whether the given character is a high surrogate
3849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
3859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
3869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
3879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isHighSurrogate(int c) {
3889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return (0xD800 <= c && c <= 0xDBFF);
3899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    }
3909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
3919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
3929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns whether the given character is a low surrogate
3939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
3949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
3959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
3969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isLowSurrogate(int c) {
3979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return (0xDC00 <= c && c <= 0xDFFF);
3989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    }
3999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
4029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character is valid. This method
4039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * also checks the surrogate character range from 0x10000 to 0x10FFFF.
4049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * <p>
4059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * If the program chooses to apply the mask directly to the
4069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * <code>CHARS</code> array, then they are responsible for checking
4079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * the surrogate character range.
4089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
4099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
4109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
4119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isValid(int c) {
4129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
4139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson               (0x10000 <= c && c <= 0x10FFFF);
4149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isValid(int):boolean
4159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
4179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character is invalid.
4189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
4199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
4209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
4219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isInvalid(int c) {
4229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return !isValid(c);
4239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isInvalid(int):boolean
4249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
4269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character can be considered content.
4279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
4289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
4299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
4309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isContent(int c) {
4319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
4329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson               (0x10000 <= c && c <= 0x10FFFF);
4339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isContent(int):boolean
4349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
4369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character can be considered markup.
4379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Markup characters include '&lt;', '&amp;', and '%'.
4389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
4399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
4409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
4419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isMarkup(int c) {
4429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return c == '<' || c == '&' || c == '%';
4439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isMarkup(int):boolean
4449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
4469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character is a space character
4479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * as defined by production [3] in the XML 1.0 specification.
4489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
4499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
4509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
4519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isSpace(int c) {
4529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
4539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isSpace(int):boolean
4549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
4569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character is a valid name start
4579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * character as defined by production [5] in the XML 1.0
4589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * specification.
4599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
4609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
4619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
4629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isNameStart(int c) {
4639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
4649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isNameStart(int):boolean
4659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
4679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character is a valid name
4689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * character as defined by production [4] in the XML 1.0
4699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * specification.
4709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
4719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
4729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
4739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isName(int c) {
4749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
4759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isName(int):boolean
4769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
4789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character is a valid NCName start
4799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * character as defined by production [4] in Namespaces in XML
4809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * recommendation.
4819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
4829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
4839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
4849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isNCNameStart(int c) {
4859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
4869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isNCNameStart(int):boolean
4879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
4899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character is a valid NCName
4909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * character as defined by production [5] in Namespaces in XML
4919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * recommendation.
4929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
4939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
4949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
4959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isNCName(int c) {
4969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
4979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isNCName(int):boolean
4989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
4999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
5009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the specified character is a valid Pubid
5019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * character as defined by production [13] in the XML 1.0
5029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * specification.
5039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
5049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param c The character to check.
5059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
5069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isPubid(int c) {
5079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
5089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isPubid(int):boolean
5099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
5109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /*
5119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * [5] Name ::= (Letter | '_' | ':') (NameChar)*
5129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
5139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
5149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Check to see if a string is a valid Name according to [5]
5159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * in the XML 1.0 Recommendation
5169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
5179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param name string to check
5189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @return true if name is a valid Name
5199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
5209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isValidName(String name) {
5219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        if (name.length() == 0)
5229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            return false;
5239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        char ch = name.charAt(0);
5249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        if( isNameStart(ch) == false)
5259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           return false;
5269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 1; i < name.length(); i++ ) {
5279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           ch = name.charAt(i);
5289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           if( isName( ch ) == false ){
5299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson              return false;
5309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           }
5319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
5329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return true;
5339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isValidName(String):boolean
5349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
5359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
5369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /*
5379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * from the namespace rec
5389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * [4] NCName ::= (Letter | '_') (NCNameChar)*
5399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
5409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
5419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Check to see if a string is a valid NCName according to [4]
5429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * from the XML Namespaces 1.0 Recommendation
5439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
5449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param ncName string to check
5459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @return true if name is a valid NCName
5469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
5479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isValidNCName(String ncName) {
5489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        if (ncName.length() == 0)
5499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            return false;
5509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        char ch = ncName.charAt(0);
5519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        if( isNCNameStart(ch) == false)
5529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           return false;
5539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 1; i < ncName.length(); i++ ) {
5549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           ch = ncName.charAt(i);
5559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           if( isNCName( ch ) == false ){
5569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson              return false;
5579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           }
5589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
5599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return true;
5609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isValidNCName(String):boolean
5619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
5629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /*
5639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * [7] Nmtoken ::= (NameChar)+
5649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
5659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
5669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Check to see if a string is a valid Nmtoken according to [7]
5679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * in the XML 1.0 Recommendation
5689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
5699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param nmtoken string to check
5709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @return true if nmtoken is a valid Nmtoken
5719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
5729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isValidNmtoken(String nmtoken) {
5739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        if (nmtoken.length() == 0)
5749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            return false;
5759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        for (int i = 0; i < nmtoken.length(); i++ ) {
5769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           char ch = nmtoken.charAt(i);
5779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           if(  ! isName( ch ) ){
5789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson              return false;
5799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           }
5809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
5819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return true;
5829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isValidName(String):boolean
5839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
5849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
5859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
5869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
5879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
5889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    // encodings
5899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
5909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
5919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the encoding name is a valid IANA encoding.
5929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * This method does not verify that there is a decoder available
5939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * for this encoding, only that the characters are valid for an
5949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * IANA encoding name.
5959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
5969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param ianaEncoding The IANA encoding name.
5979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
5989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isValidIANAEncoding(String ianaEncoding) {
5999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        if (ianaEncoding != null) {
6009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            int length = ianaEncoding.length();
6019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            if (length > 0) {
6029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                char c = ianaEncoding.charAt(0);
6039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
6049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                    for (int i = 1; i < length; i++) {
6059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                        c = ianaEncoding.charAt(i);
6069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                        if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
6079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                            (c < '0' || c > '9') && c != '.' && c != '_' &&
6089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                            c != '-') {
6099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                            return false;
6109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                        }
6119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                    }
6129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                    return true;
6139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                }
6149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            }
6159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
6169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return false;
6179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isValidIANAEncoding(String):boolean
6189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
6199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    /**
6209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Returns true if the encoding name is a valid Java encoding.
6219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * This method does not verify that there is a decoder available
6229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * for this encoding, only that the characters are valid for an
6239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Java encoding name.
6249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     *
6259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * @param javaEncoding The Java encoding name.
6269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
6279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isValidJavaEncoding(String javaEncoding) {
6289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        if (javaEncoding != null) {
6299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            int length = javaEncoding.length();
6309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            if (length > 0) {
6319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                for (int i = 1; i < length; i++) {
6329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                    char c = javaEncoding.charAt(i);
6339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                    if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
6349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                        (c < '0' || c > '9') && c != '.' && c != '_' &&
6359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                        c != '-') {
6369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                        return false;
6379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                    }
6389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                }
6399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson                return true;
6409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson            }
6419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        }
6429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson        return false;
6439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    } // isValidIANAEncoding(String):boolean
6449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
6459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson   /**
6469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * Simple check to determine if qname is legal. If it returns false
6479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * then <param>str</param> is illegal; if it returns true then
6489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     * <param>str</param> is legal.
6499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson     */
6509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    public static boolean isValidQName(String str) {
6519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
6529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson       final int colon = str.indexOf(':');
6539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
6549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson       if (colon == 0 || colon == str.length() - 1) {
6559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           return false;
6569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson       }
6579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
6589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson       if (colon > 0) {
6599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           final String prefix = str.substring(0,colon);
6609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           final String localPart = str.substring(colon+1);
6619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           return isValidNCName(prefix) && isValidNCName(localPart);
6629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson       }
6639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson       else {
6649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson           return isValidNCName(str);
6659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson       }
6669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson    }
6679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson
6689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson} // class XMLChar
669