19f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson/* 29f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Licensed to the Apache Software Foundation (ASF) under one 39f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * or more contributor license agreements. See the NOTICE file 49f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * distributed with this work for additional information 59f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * regarding copyright ownership. The ASF licenses this file 69f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * to you under the Apache License, Version 2.0 (the "License"); 79f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * you may not use this file except in compliance with the License. 89f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * You may obtain a copy of the License at 99f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * http://www.apache.org/licenses/LICENSE-2.0 119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Unless required by applicable law or agreed to in writing, software 139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * distributed under the License is distributed on an "AS IS" BASIS, 149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * See the License for the specific language governing permissions and 169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * limitations under the License. 179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson/* 199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * $Id$ 209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilsonpackage org.apache.xml.utils; 239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson/** 259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * This class defines the basic XML character properties. The data 269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * in this class can be used to verify that a character is a valid 279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * XML character or if the character is a space, name start, or name 289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * character. 299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * <p> 309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * A series of convenience methods are supplied to ease the burden 319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * of the developer. Because inlining the checks can improve per 329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * character performance, the tables of character properties are 339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * public. Using the character as an index into the <code>CHARS</code> 349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * array and applying the appropriate mask flag (e.g. 359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * <code>MASK_VALID</code>), yields the same results as calling the 369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * convenience methods. There is one exception: check the comments 379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * for the <code>isValid</code> method for details. 389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @author Glenn Marcy, IBM 409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @author Andy Clark, IBM 419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @author Eric Ye, IBM 429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @author Arnaud Le Hors, IBM 439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @author Rahul Srivastava, Sun Microsystems Inc. 449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @version $Id: XMLChar.java,v 1.7 2002/01/29 01:15:18 lehors Exp $ 469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilsonpublic class XMLChar { 489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // Constants 519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** Character flags. */ 549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson private static final byte[] CHARS = new byte[1 << 16]; 559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** Valid character mask. */ 579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static final int MASK_VALID = 0x01; 589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** Space character mask. */ 609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static final int MASK_SPACE = 0x02; 619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** Name start character mask. */ 639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static final int MASK_NAME_START = 0x04; 649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** Name character mask. */ 669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static final int MASK_NAME = 0x08; 679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** Pubid character mask. */ 699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static final int MASK_PUBID = 0x10; 709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Content character mask. Special characters are those that can 739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * be considered the start of markup, such as '<' and '&'. 749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * The various newline characters are considered special as well. 759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * All other valid XML characters can be considered content. 769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * <p> 779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * This is an optimization for the inner loop of character scanning. 789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static final int MASK_CONTENT = 0x20; 809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** NCName start character mask. */ 829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static final int MASK_NCNAME_START = 0x40; 839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** NCName character mask. */ 859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static final int MASK_NCNAME = 0x80; 869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // Static initialization 899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson static { 929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | 959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // [#xE000-#xFFFD] | [#x10000-#x10FFFF] 969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int charRange[] = { 999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD, 1009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 1019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 1039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // [3] S ::= (#x20 | #x9 | #xD | #xA)+ 1049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 1059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int spaceChar[] = { 1079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0020, 0x0009, 0x000D, 0x000A, 1089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 1099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 1119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 1129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // CombiningChar | Extender 1139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 1149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int nameChar[] = { 1169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x002D, 0x002E, // '-' and '.' 1179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 1189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 1209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // [5] Name ::= (Letter | '_' | ':') (NameChar)* 1219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 1229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int nameStartChar[] = { 1249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x003A, 0x005F, // ':' and '_' 1259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 1269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 1289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 1299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 1309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int pubidChar[] = { 1329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D, 1339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x005F 1349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 1359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int pubidRange[] = { 1379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A 1389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 1399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 1419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // [84] Letter ::= BaseChar | Ideographic 1429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 1439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 1449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int letterRange[] = { 1459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // BaseChar 1469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6, 1479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E, 1489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217, 1499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1, 1509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C, 1519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4, 1529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5, 1539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA, 1549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7, 1559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6, 1569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990, 1579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD, 1589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10, 1599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36, 1609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B, 1619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3, 1629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28, 1639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D, 1649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95, 1659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA, 1669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10, 1679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61, 1689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3, 1699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10, 1709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E, 1719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88, 1729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB, 1739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47, 1749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103, 1759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155, 1769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF, 1779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9, 1789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D, 1799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC, 1809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB, 1819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B, 1829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C, 1839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0xAC00, 0xD7A3, 1849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // Ideographic 1859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x3021, 0x3029, 0x4E00, 0x9FA5, 1869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 1879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int letterChar[] = { 1889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // BaseChar 1899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5, 1909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C, 1919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0, 1929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E, 1939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E, 1949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B, 1959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x1F5D, 0x1FBE, 0x2126, 0x212E, 1969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // Ideographic 1979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x3007, 1989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 1999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 2019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // [87] CombiningChar ::= ... 2029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 2039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int combiningCharRange[] = { 2059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1, 2069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652, 2079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8, 2089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954, 2099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8, 2109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48, 2119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5, 2129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43, 2139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83, 2149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03, 2159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 2169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD, 2179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48, 2189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9, 2199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84, 2209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7, 2219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x20D0, 0x20DC, 0x302A, 0x302F, 2229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 2239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int combiningCharChar[] = { 2259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF, 2269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7, 2279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F, 2289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A, 2299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 2309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 2329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // [88] Digit ::= ... 2339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 2349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int digitRange[] = { 2369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F, 2379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F, 2389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F, 2399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 2409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 2419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 2439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // [89] Extender ::= ... 2449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 2459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int extenderRange[] = { 2479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE, 2489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 2499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int extenderChar[] = { 2519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005, 2529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 2539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 2559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // SpecialChar ::= '<', '&', '\n', '\r', ']' 2569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 2579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int specialChar[] = { 2599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson '<', '&', '\n', '\r', ']', 2609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson }; 2619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 2639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // Initialize 2649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 2659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // set valid characters 2679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < charRange.length; i += 2) { 2689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int j = charRange[i]; j <= charRange[i + 1]; j++) { 2699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[j] |= MASK_VALID | MASK_CONTENT; 2709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 2719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 2729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // remove special characters 2749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < specialChar.length; i++) { 2759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT); 2769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 2779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // set space characters 2799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < spaceChar.length; i++) { 2809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[spaceChar[i]] |= MASK_SPACE; 2819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 2829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // set name start characters 2849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < nameStartChar.length; i++) { 2859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME | 2869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson MASK_NCNAME_START | MASK_NCNAME; 2879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 2889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < letterRange.length; i += 2) { 2899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) { 2909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[j] |= MASK_NAME_START | MASK_NAME | 2919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson MASK_NCNAME_START | MASK_NCNAME; 2929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 2939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 2949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < letterChar.length; i++) { 2959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME | 2969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson MASK_NCNAME_START | MASK_NCNAME; 2979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 2989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 2999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // set name characters 3009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < nameChar.length; i++) { 3019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME; 3029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < digitRange.length; i += 2) { 3049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) { 3059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[j] |= MASK_NAME | MASK_NCNAME; 3069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < combiningCharRange.length; i += 2) { 3099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) { 3109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[j] |= MASK_NAME | MASK_NCNAME; 3119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < combiningCharChar.length; i++) { 3149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME; 3159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < extenderRange.length; i += 2) { 3179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) { 3189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[j] |= MASK_NAME | MASK_NCNAME; 3199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < extenderChar.length; i++) { 3229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME; 3239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 3259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars 3269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME); 3279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 3289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // set Pubid characters 3299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < pubidChar.length; i++) { 3309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[pubidChar[i]] |= MASK_PUBID; 3319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < pubidRange.length; i += 2) { 3339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) { 3349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson CHARS[j] |= MASK_PUBID; 3359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 3389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // <clinit>() 3399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 3409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 3419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // Public static methods 3429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // 3439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 3449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 3459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character is a supplemental character. 3469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 3479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 3489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 3499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isSupplemental(int c) { 3509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return (c >= 0x10000 && c <= 0x10FFFF); 3519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 3539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 3549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true the supplemental character corresponding to the given 3559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * surrogates. 3569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 3579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param h The high surrogate. 3589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param l The low surrogate. 3599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 3609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static int supplemental(char h, char l) { 3619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000; 3629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 3649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 3659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns the high surrogate of a supplemental character 3669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 3679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The supplemental character to "split". 3689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 3699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static char highSurrogate(int c) { 3709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return (char) (((c - 0x00010000) >> 10) + 0xD800); 3719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 3739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 3749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns the low surrogate of a supplemental character 3759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 3769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The supplemental character to "split". 3779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 3789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static char lowSurrogate(int c) { 3799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00); 3809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 3829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 3839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns whether the given character is a high surrogate 3849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 3859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 3869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 3879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isHighSurrogate(int c) { 3889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return (0xD800 <= c && c <= 0xDBFF); 3899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 3919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 3929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns whether the given character is a low surrogate 3939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 3949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 3959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 3969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isLowSurrogate(int c) { 3979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return (0xDC00 <= c && c <= 0xDFFF); 3989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 3999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 4029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character is valid. This method 4039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * also checks the surrogate character range from 0x10000 to 0x10FFFF. 4049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * <p> 4059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * If the program chooses to apply the mask directly to the 4069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * <code>CHARS</code> array, then they are responsible for checking 4079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * the surrogate character range. 4089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 4099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 4109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 4119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isValid(int c) { 4129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) || 4139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson (0x10000 <= c && c <= 0x10FFFF); 4149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isValid(int):boolean 4159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 4179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character is invalid. 4189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 4199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 4209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 4219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isInvalid(int c) { 4229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return !isValid(c); 4239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isInvalid(int):boolean 4249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 4269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character can be considered content. 4279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 4289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 4299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 4309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isContent(int c) { 4319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) || 4329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson (0x10000 <= c && c <= 0x10FFFF); 4339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isContent(int):boolean 4349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 4369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character can be considered markup. 4379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Markup characters include '<', '&', and '%'. 4389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 4399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 4409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 4419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isMarkup(int c) { 4429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return c == '<' || c == '&' || c == '%'; 4439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isMarkup(int):boolean 4449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 4469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character is a space character 4479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * as defined by production [3] in the XML 1.0 specification. 4489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 4499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 4509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 4519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isSpace(int c) { 4529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0; 4539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isSpace(int):boolean 4549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 4569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character is a valid name start 4579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * character as defined by production [5] in the XML 1.0 4589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * specification. 4599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 4609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 4619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 4629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isNameStart(int c) { 4639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0; 4649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isNameStart(int):boolean 4659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 4679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character is a valid name 4689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * character as defined by production [4] in the XML 1.0 4699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * specification. 4709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 4719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 4729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 4739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isName(int c) { 4749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0; 4759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isName(int):boolean 4769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 4789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character is a valid NCName start 4799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * character as defined by production [4] in Namespaces in XML 4809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * recommendation. 4819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 4829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 4839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 4849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isNCNameStart(int c) { 4859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0; 4869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isNCNameStart(int):boolean 4879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 4899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character is a valid NCName 4909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * character as defined by production [5] in Namespaces in XML 4919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * recommendation. 4929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 4939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 4949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 4959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isNCName(int c) { 4969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0; 4979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isNCName(int):boolean 4989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 4999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 5009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the specified character is a valid Pubid 5019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * character as defined by production [13] in the XML 1.0 5029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * specification. 5039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 5049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param c The character to check. 5059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 5069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isPubid(int c) { 5079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0; 5089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isPubid(int):boolean 5099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 5109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /* 5119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * [5] Name ::= (Letter | '_' | ':') (NameChar)* 5129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 5139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 5149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Check to see if a string is a valid Name according to [5] 5159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * in the XML 1.0 Recommendation 5169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 5179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param name string to check 5189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @return true if name is a valid Name 5199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 5209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isValidName(String name) { 5219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if (name.length() == 0) 5229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 5239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson char ch = name.charAt(0); 5249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if( isNameStart(ch) == false) 5259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 5269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 1; i < name.length(); i++ ) { 5279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson ch = name.charAt(i); 5289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if( isName( ch ) == false ){ 5299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 5309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 5319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 5329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return true; 5339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isValidName(String):boolean 5349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 5359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 5369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /* 5379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * from the namespace rec 5389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * [4] NCName ::= (Letter | '_') (NCNameChar)* 5399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 5409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 5419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Check to see if a string is a valid NCName according to [4] 5429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * from the XML Namespaces 1.0 Recommendation 5439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 5449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param ncName string to check 5459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @return true if name is a valid NCName 5469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 5479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isValidNCName(String ncName) { 5489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if (ncName.length() == 0) 5499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 5509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson char ch = ncName.charAt(0); 5519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if( isNCNameStart(ch) == false) 5529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 5539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 1; i < ncName.length(); i++ ) { 5549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson ch = ncName.charAt(i); 5559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if( isNCName( ch ) == false ){ 5569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 5579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 5589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 5599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return true; 5609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isValidNCName(String):boolean 5619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 5629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /* 5639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * [7] Nmtoken ::= (NameChar)+ 5649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 5659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 5669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Check to see if a string is a valid Nmtoken according to [7] 5679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * in the XML 1.0 Recommendation 5689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 5699f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param nmtoken string to check 5709f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @return true if nmtoken is a valid Nmtoken 5719f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 5729f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isValidNmtoken(String nmtoken) { 5739f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if (nmtoken.length() == 0) 5749f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 5759f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 0; i < nmtoken.length(); i++ ) { 5769f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson char ch = nmtoken.charAt(i); 5779f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if( ! isName( ch ) ){ 5789f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 5799f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 5809f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 5819f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return true; 5829f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isValidName(String):boolean 5839f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 5849f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 5859f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 5869f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 5879f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 5889f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson // encodings 5899f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 5909f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 5919f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the encoding name is a valid IANA encoding. 5929f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * This method does not verify that there is a decoder available 5939f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * for this encoding, only that the characters are valid for an 5949f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * IANA encoding name. 5959f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 5969f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param ianaEncoding The IANA encoding name. 5979f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 5989f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isValidIANAEncoding(String ianaEncoding) { 5999f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if (ianaEncoding != null) { 6009f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int length = ianaEncoding.length(); 6019f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if (length > 0) { 6029f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson char c = ianaEncoding.charAt(0); 6039f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 6049f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 1; i < length; i++) { 6059f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson c = ianaEncoding.charAt(i); 6069f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 6079f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson (c < '0' || c > '9') && c != '.' && c != '_' && 6089f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson c != '-') { 6099f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 6109f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6119f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6129f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return true; 6139f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6149f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6159f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6169f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 6179f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isValidIANAEncoding(String):boolean 6189f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 6199f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 6209f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Returns true if the encoding name is a valid Java encoding. 6219f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * This method does not verify that there is a decoder available 6229f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * for this encoding, only that the characters are valid for an 6239f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Java encoding name. 6249f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * 6259f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * @param javaEncoding The Java encoding name. 6269f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 6279f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isValidJavaEncoding(String javaEncoding) { 6289f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if (javaEncoding != null) { 6299f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson int length = javaEncoding.length(); 6309f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if (length > 0) { 6319f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson for (int i = 1; i < length; i++) { 6329f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson char c = javaEncoding.charAt(i); 6339f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 6349f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson (c < '0' || c > '9') && c != '.' && c != '_' && 6359f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson c != '-') { 6369f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 6379f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6389f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6399f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return true; 6409f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6419f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6429f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 6439f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } // isValidIANAEncoding(String):boolean 6449f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 6459f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson /** 6469f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * Simple check to determine if qname is legal. If it returns false 6479f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * then <param>str</param> is illegal; if it returns true then 6489f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson * <param>str</param> is legal. 6499f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson */ 6509f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson public static boolean isValidQName(String str) { 6519f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 6529f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson final int colon = str.indexOf(':'); 6539f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 6549f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if (colon == 0 || colon == str.length() - 1) { 6559f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return false; 6569f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6579f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 6589f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson if (colon > 0) { 6599f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson final String prefix = str.substring(0,colon); 6609f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson final String localPart = str.substring(colon+1); 6619f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return isValidNCName(prefix) && isValidNCName(localPart); 6629f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6639f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson else { 6649f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson return isValidNCName(str); 6659f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6669f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson } 6679f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson 6689f8118474e9513f7a5b7d2a05e4a0fb15d1a6569Jesse Wilson} // class XMLChar 669