DOMConfigurationImpl.java revision 5501a3d4b3d7657c183ed5446fe67fa011fbf70b
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package org.apache.harmony.xml.dom; 18 19import java.util.Map; 20import java.util.TreeMap; 21import org.apache.xml.serializer.dom3.DOMErrorImpl; 22import org.w3c.dom.DOMConfiguration; 23import org.w3c.dom.DOMError; 24import org.w3c.dom.DOMErrorHandler; 25import org.w3c.dom.DOMException; 26import org.w3c.dom.DOMStringList; 27import org.w3c.dom.NamedNodeMap; 28import org.w3c.dom.Node; 29 30/** 31 * A minimal implementation of DOMConfiguration. This implementation uses inner 32 * parameter instances to centralize each parameter's behavior. 33 */ 34public final class DOMConfigurationImpl implements DOMConfiguration { 35 36 private static final Map<String, Parameter> PARAMETERS 37 = new TreeMap<String, Parameter>(String.CASE_INSENSITIVE_ORDER); 38 39 static { 40 /* 41 * True to canonicalize the document (unsupported). This includes 42 * removing DocumentType nodes from the tree and removing unused 43 * namespace declarations. Setting this to true also sets these 44 * parameters: 45 * entities = false 46 * normalize-characters = false 47 * cdata-sections = false 48 * namespaces = true 49 * namespace-declarations = true 50 * well-formed = true 51 * element-content-whitespace = true 52 * Setting these parameters to another value shall revert the canonical 53 * form to false. 54 */ 55 PARAMETERS.put("canonical-form", new FixedParameter(false)); 56 57 /* 58 * True to keep existing CDATA nodes; false to replace them/merge them 59 * into adjacent text nodes. 60 */ 61 PARAMETERS.put("cdata-sections", new BooleanParameter() { 62 public Object get(DOMConfigurationImpl config) { 63 return config.cdataSections; 64 } 65 public void set(DOMConfigurationImpl config, Object value) { 66 config.cdataSections = (Boolean) value; 67 } 68 }); 69 70 /* 71 * True to check character normalization (unsupported). 72 */ 73 PARAMETERS.put("check-character-normalization", new FixedParameter(false)); 74 75 /* 76 * True to keep comments in the document; false to discard them. 77 */ 78 PARAMETERS.put("comments", new BooleanParameter() { 79 public Object get(DOMConfigurationImpl config) { 80 return config.comments; 81 } 82 public void set(DOMConfigurationImpl config, Object value) { 83 config.comments = (Boolean) value; 84 } 85 }); 86 87 /* 88 * True to expose schema normalized values. Setting this to true sets 89 * the validate parameter to true. Has no effect when validate is false. 90 */ 91 PARAMETERS.put("datatype-normalization", new BooleanParameter() { 92 public Object get(DOMConfigurationImpl config) { 93 return config.datatypeNormalization; 94 } 95 public void set(DOMConfigurationImpl config, Object value) { 96 if ((Boolean) value) { 97 config.datatypeNormalization = true; 98 config.validate = true; 99 } else { 100 config.datatypeNormalization = false; 101 } 102 } 103 }); 104 105 /* 106 * True to keep whitespace elements in the document; false to discard 107 * them (unsupported). 108 */ 109 PARAMETERS.put("element-content-whitespace", new FixedParameter(true)); 110 111 /* 112 * True to keep entity references in the document; false to expand them. 113 */ 114 PARAMETERS.put("entities", new BooleanParameter() { 115 public Object get(DOMConfigurationImpl config) { 116 return config.entities; 117 } 118 public void set(DOMConfigurationImpl config, Object value) { 119 config.entities = (Boolean) value; 120 } 121 }); 122 123 /* 124 * Handler to be invoked when errors are encountered. 125 */ 126 PARAMETERS.put("error-handler", new Parameter() { 127 public Object get(DOMConfigurationImpl config) { 128 return config.errorHandler; 129 } 130 public void set(DOMConfigurationImpl config, Object value) { 131 config.errorHandler = (DOMErrorHandler) value; 132 } 133 public boolean canSet(DOMConfigurationImpl config, Object value) { 134 return value == null || value instanceof DOMErrorHandler; 135 } 136 }); 137 138 /* 139 * Bulk alias to set the following parameter values: 140 * validate-if-schema = false 141 * entities = false 142 * datatype-normalization = false 143 * cdata-sections = false 144 * namespace-declarations = true 145 * well-formed = true 146 * element-content-whitespace = true 147 * comments = true 148 * namespaces = true. 149 * Querying this returns true if all of the above parameters have the 150 * listed values; false otherwise. 151 */ 152 PARAMETERS.put("infoset", new BooleanParameter() { 153 public Object get(DOMConfigurationImpl config) { 154 // validate-if-schema is always false 155 // element-content-whitespace is always true 156 // namespace-declarations is always true 157 return !config.entities 158 && !config.datatypeNormalization 159 && !config.cdataSections 160 && config.wellFormed 161 && config.comments 162 && config.namespaces; 163 } 164 public void set(DOMConfigurationImpl config, Object value) { 165 if ((Boolean) value) { 166 // validate-if-schema is always false 167 // element-content-whitespace is always true 168 // namespace-declarations is always true 169 config.entities = false; 170 config.datatypeNormalization = false; 171 config.cdataSections = false; 172 config.wellFormed = true; 173 config.comments = true; 174 config.namespaces = true; 175 } 176 } 177 }); 178 179 /* 180 * True to perform namespace processing; false for none. 181 */ 182 PARAMETERS.put("namespaces", new BooleanParameter() { 183 public Object get(DOMConfigurationImpl config) { 184 return config.namespaces; 185 } 186 public void set(DOMConfigurationImpl config, Object value) { 187 config.namespaces = (Boolean) value; 188 } 189 }); 190 191 /** 192 * True to include namespace declarations; false to discard them 193 * (unsupported). Even when namespace declarations are discarded, 194 * prefixes are retained. 195 * 196 * Has no effect if namespaces is false. 197 */ 198 PARAMETERS.put("namespace-declarations", new FixedParameter(true)); 199 200 /* 201 * True to fully normalize characters (unsupported). 202 */ 203 PARAMETERS.put("normalize-characters", new FixedParameter(false)); 204 205 /* 206 * A list of whitespace-separated URIs representing the schemas to validate 207 * against. Has no effect if schema-type is null. 208 */ 209 PARAMETERS.put("schema-location", new Parameter() { 210 public Object get(DOMConfigurationImpl config) { 211 return config.schemaLocation; 212 } 213 public void set(DOMConfigurationImpl config, Object value) { 214 config.schemaLocation = (String) value; 215 } 216 public boolean canSet(DOMConfigurationImpl config, Object value) { 217 return value == null || value instanceof String; 218 } 219 }); 220 221 /* 222 * URI representing the type of schema language, such as 223 * "http://www.w3.org/2001/XMLSchema" or "http://www.w3.org/TR/REC-xml". 224 */ 225 PARAMETERS.put("schema-type", new Parameter() { 226 public Object get(DOMConfigurationImpl config) { 227 return config.schemaType; 228 } 229 public void set(DOMConfigurationImpl config, Object value) { 230 config.schemaType = (String) value; 231 } 232 public boolean canSet(DOMConfigurationImpl config, Object value) { 233 return value == null || value instanceof String; 234 } 235 }); 236 237 /* 238 * True to split CDATA sections containing "]]>"; false to signal an 239 * error instead. 240 */ 241 PARAMETERS.put("split-cdata-sections", new BooleanParameter() { 242 public Object get(DOMConfigurationImpl config) { 243 return config.splitCdataSections; 244 } 245 public void set(DOMConfigurationImpl config, Object value) { 246 config.splitCdataSections = (Boolean) value; 247 } 248 }); 249 250 /* 251 * True to require validation against a schema or DTD. Validation will 252 * recompute element content whitespace, ID and schema type data. 253 * 254 * Setting this unsets validate-if-schema. 255 */ 256 PARAMETERS.put("validate", new BooleanParameter() { 257 public Object get(DOMConfigurationImpl config) { 258 return config.validate; 259 } 260 public void set(DOMConfigurationImpl config, Object value) { 261 // validate-if-schema is always false 262 config.validate = (Boolean) value; 263 } 264 }); 265 266 /* 267 * True to validate if a schema was declared (unsupported). Setting this 268 * unsets validate. 269 */ 270 PARAMETERS.put("validate-if-schema", new FixedParameter(false)); 271 272 /* 273 * True to report invalid characters in node names, attributes, elements, 274 * comments, text, CDATA sections and processing instructions. 275 */ 276 PARAMETERS.put("well-formed", new BooleanParameter() { 277 public Object get(DOMConfigurationImpl config) { 278 return config.wellFormed; 279 } 280 public void set(DOMConfigurationImpl config, Object value) { 281 config.wellFormed = (Boolean) value; 282 } 283 }); 284 285 // TODO add "resource-resolver" property for use with LS feature... 286 } 287 288 private boolean cdataSections = true; 289 private boolean comments = true; 290 private boolean datatypeNormalization = false; 291 private boolean entities = true; 292 private DOMErrorHandler errorHandler; 293 private boolean namespaces = true; 294 private String schemaLocation; 295 private String schemaType; 296 private boolean splitCdataSections = true; 297 private boolean validate = false; 298 private boolean wellFormed = true; 299 300 interface Parameter { 301 Object get(DOMConfigurationImpl config); 302 void set(DOMConfigurationImpl config, Object value); 303 boolean canSet(DOMConfigurationImpl config, Object value); 304 } 305 306 static class FixedParameter implements Parameter { 307 final Object onlyValue; 308 FixedParameter(Object onlyValue) { 309 this.onlyValue = onlyValue; 310 } 311 public Object get(DOMConfigurationImpl config) { 312 return onlyValue; 313 } 314 public void set(DOMConfigurationImpl config, Object value) { 315 if (!onlyValue.equals(value)) { 316 throw new DOMException(DOMException.NOT_SUPPORTED_ERR, 317 "Unsupported value: " + value); 318 } 319 } 320 public boolean canSet(DOMConfigurationImpl config, Object value) { 321 return onlyValue.equals(value); 322 } 323 } 324 325 static abstract class BooleanParameter implements Parameter { 326 public boolean canSet(DOMConfigurationImpl config, Object value) { 327 return value instanceof Boolean; 328 } 329 } 330 331 public boolean canSetParameter(String name, Object value) { 332 Parameter parameter = PARAMETERS.get(name); 333 return parameter != null && parameter.canSet(this, value); 334 } 335 336 public void setParameter(String name, Object value) throws DOMException { 337 Parameter parameter = PARAMETERS.get(name); 338 if (parameter == null) { 339 throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name); 340 } 341 try { 342 parameter.set(this, value); 343 } catch (NullPointerException e) { 344 throw new DOMException(DOMException.TYPE_MISMATCH_ERR, 345 "Null not allowed for " + name); 346 } catch (ClassCastException e) { 347 throw new DOMException(DOMException.TYPE_MISMATCH_ERR, 348 "Invalid type for " + name + ": " + value.getClass()); 349 } 350 } 351 352 public Object getParameter(String name) throws DOMException { 353 Parameter parameter = PARAMETERS.get(name); 354 if (parameter == null) { 355 throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name); 356 } 357 return parameter.get(this); 358 } 359 360 public DOMStringList getParameterNames() { 361 final String[] result = PARAMETERS.keySet().toArray(new String[PARAMETERS.size()]); 362 return new DOMStringList() { 363 public String item(int index) { 364 return index < result.length ? result[index] : null; 365 } 366 public int getLength() { 367 return result.length; 368 } 369 public boolean contains(String str) { 370 return PARAMETERS.containsKey(str); // case-insensitive. 371 } 372 }; 373 } 374 375 public void normalize(Node node) { 376 /* 377 * Since we don't validate, this code doesn't take into account the 378 * following "supported" parameters: datatype-normalization, entities, 379 * schema-location, schema-type, or validate. 380 * 381 * TODO: normalize namespaces 382 */ 383 384 switch (node.getNodeType()) { 385 case Node.CDATA_SECTION_NODE: 386 CDATASectionImpl cdata = (CDATASectionImpl) node; 387 if (cdataSections) { 388 if (cdata.needsSplitting()) { 389 if (splitCdataSections) { 390 cdata.split(); 391 report(DOMError.SEVERITY_WARNING, "cdata-sections-splitted"); 392 } else { 393 report(DOMError.SEVERITY_ERROR, "wf-invalid-character"); 394 } 395 } 396 checkTextValidity(cdata.buffer); 397 break; 398 } 399 node = cdata.replaceWithText(); 400 // fall through 401 402 case Node.TEXT_NODE: 403 TextImpl text = (TextImpl) node; 404 text = text.minimize(); 405 if (text != null) { 406 checkTextValidity(text.buffer); 407 } 408 break; 409 410 case Node.COMMENT_NODE: 411 CommentImpl comment = (CommentImpl) node; 412 if (!comments) { 413 comment.getParentNode().removeChild(comment); 414 break; 415 } 416 if (comment.containsDashDash()) { 417 report(DOMError.SEVERITY_ERROR, "wf-invalid-character"); 418 } 419 checkTextValidity(comment.buffer); 420 break; 421 422 case Node.PROCESSING_INSTRUCTION_NODE: 423 checkTextValidity(((ProcessingInstructionImpl) node).getData()); 424 break; 425 426 case Node.ATTRIBUTE_NODE: 427 checkTextValidity(((AttrImpl) node).getValue()); 428 break; 429 430 case Node.ELEMENT_NODE: 431 ElementImpl element = (ElementImpl) node; 432 NamedNodeMap attributes = element.getAttributes(); 433 for (int i = 0; i < attributes.getLength(); i++) { 434 normalize(attributes.item(i)); 435 } 436 // fall through 437 438 case Node.DOCUMENT_NODE: 439 case Node.DOCUMENT_FRAGMENT_NODE: 440 Node next; 441 for (Node child = node.getFirstChild(); child != null; child = next) { 442 // lookup next eagerly because normalize() may remove its subject 443 next = child.getNextSibling(); 444 normalize(child); 445 } 446 break; 447 448 case Node.NOTATION_NODE: 449 case Node.DOCUMENT_TYPE_NODE: 450 case Node.ENTITY_NODE: 451 case Node.ENTITY_REFERENCE_NODE: 452 break; 453 454 default: 455 throw new DOMException(DOMException.NOT_SUPPORTED_ERR, 456 "Unsupported node type " + node.getNodeType()); 457 } 458 } 459 460 private void checkTextValidity(CharSequence s) { 461 if (wellFormed && !isValid(s)) { 462 report(DOMError.SEVERITY_ERROR, "wf-invalid-character"); 463 } 464 } 465 466 /** 467 * Returns true if all of the characters in the text are permitted for use 468 * in XML documents. 469 */ 470 private boolean isValid(CharSequence text) { 471 for (int i = 0; i < text.length(); i++) { 472 char c = text.charAt(i); 473 // as defined by http://www.w3.org/TR/REC-xml/#charsets. 474 boolean valid = c == 0x9 || c == 0xA || c == 0xD 475 || (c >= 0x20 && c <= 0xd7ff) 476 || (c >= 0xe000 && c <= 0xfffd); 477 if (!valid) { 478 return false; 479 } 480 } 481 return true; 482 } 483 484 private void report(short severity, String type) { 485 if (errorHandler != null) { 486 // TODO: abort if handleError returns false 487 errorHandler.handleError(new DOMErrorImpl(severity, type, type)); 488 } 489 } 490} 491