DOMConfigurationImpl.java revision 5501a3d4b3d7657c183ed5446fe67fa011fbf70b
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.harmony.xml.dom;
18
19import java.util.Map;
20import java.util.TreeMap;
21import org.apache.xml.serializer.dom3.DOMErrorImpl;
22import org.w3c.dom.DOMConfiguration;
23import org.w3c.dom.DOMError;
24import org.w3c.dom.DOMErrorHandler;
25import org.w3c.dom.DOMException;
26import org.w3c.dom.DOMStringList;
27import org.w3c.dom.NamedNodeMap;
28import org.w3c.dom.Node;
29
30/**
31 * A minimal implementation of DOMConfiguration. This implementation uses inner
32 * parameter instances to centralize each parameter's behavior.
33 */
34public final class DOMConfigurationImpl implements DOMConfiguration {
35
36    private static final Map<String, Parameter> PARAMETERS
37            = new TreeMap<String, Parameter>(String.CASE_INSENSITIVE_ORDER);
38
39    static {
40        /*
41         * True to canonicalize the document (unsupported). This includes
42         * removing DocumentType nodes from the tree and removing unused
43         * namespace declarations. Setting this to true also sets these
44         * parameters:
45         *   entities = false
46         *   normalize-characters = false
47         *   cdata-sections = false
48         *   namespaces = true
49         *   namespace-declarations = true
50         *   well-formed = true
51         *   element-content-whitespace = true
52         * Setting these parameters to another value shall revert the canonical
53         * form to false.
54         */
55        PARAMETERS.put("canonical-form", new FixedParameter(false));
56
57        /*
58         * True to keep existing CDATA nodes; false to replace them/merge them
59         * into adjacent text nodes.
60         */
61        PARAMETERS.put("cdata-sections", new BooleanParameter() {
62            public Object get(DOMConfigurationImpl config) {
63                return config.cdataSections;
64            }
65            public void set(DOMConfigurationImpl config, Object value) {
66                config.cdataSections = (Boolean) value;
67            }
68        });
69
70        /*
71         * True to check character normalization (unsupported).
72         */
73        PARAMETERS.put("check-character-normalization", new FixedParameter(false));
74
75        /*
76         * True to keep comments in the document; false to discard them.
77         */
78        PARAMETERS.put("comments", new BooleanParameter() {
79            public Object get(DOMConfigurationImpl config) {
80                return config.comments;
81            }
82            public void set(DOMConfigurationImpl config, Object value) {
83                config.comments = (Boolean) value;
84            }
85        });
86
87        /*
88         * True to expose schema normalized values. Setting this to true sets
89         * the validate parameter to true. Has no effect when validate is false.
90         */
91        PARAMETERS.put("datatype-normalization", new BooleanParameter() {
92            public Object get(DOMConfigurationImpl config) {
93                return config.datatypeNormalization;
94            }
95            public void set(DOMConfigurationImpl config, Object value) {
96                if ((Boolean) value) {
97                    config.datatypeNormalization = true;
98                    config.validate = true;
99                } else {
100                    config.datatypeNormalization = false;
101                }
102            }
103        });
104
105        /*
106         * True to keep whitespace elements in the document; false to discard
107         * them (unsupported).
108         */
109        PARAMETERS.put("element-content-whitespace", new FixedParameter(true));
110
111        /*
112         * True to keep entity references in the document; false to expand them.
113         */
114        PARAMETERS.put("entities", new BooleanParameter() {
115            public Object get(DOMConfigurationImpl config) {
116                return config.entities;
117            }
118            public void set(DOMConfigurationImpl config, Object value) {
119                config.entities = (Boolean) value;
120            }
121        });
122
123        /*
124         * Handler to be invoked when errors are encountered.
125         */
126        PARAMETERS.put("error-handler", new Parameter() {
127            public Object get(DOMConfigurationImpl config) {
128                return config.errorHandler;
129            }
130            public void set(DOMConfigurationImpl config, Object value) {
131                config.errorHandler = (DOMErrorHandler) value;
132            }
133            public boolean canSet(DOMConfigurationImpl config, Object value) {
134                return value == null || value instanceof DOMErrorHandler;
135            }
136        });
137
138        /*
139         * Bulk alias to set the following parameter values:
140         *   validate-if-schema = false
141         *   entities = false
142         *   datatype-normalization = false
143         *   cdata-sections = false
144         *   namespace-declarations = true
145         *   well-formed = true
146         *   element-content-whitespace = true
147         *   comments = true
148         *   namespaces = true.
149         * Querying this returns true if all of the above parameters have the
150         * listed values; false otherwise.
151         */
152        PARAMETERS.put("infoset", new BooleanParameter() {
153            public Object get(DOMConfigurationImpl config) {
154                // validate-if-schema is always false
155                // element-content-whitespace is always true
156                // namespace-declarations is always true
157                return !config.entities
158                        && !config.datatypeNormalization
159                        && !config.cdataSections
160                        && config.wellFormed
161                        && config.comments
162                        && config.namespaces;
163            }
164            public void set(DOMConfigurationImpl config, Object value) {
165                if ((Boolean) value) {
166                    // validate-if-schema is always false
167                    // element-content-whitespace is always true
168                    // namespace-declarations is always true
169                    config.entities = false;
170                    config.datatypeNormalization = false;
171                    config.cdataSections = false;
172                    config.wellFormed = true;
173                    config.comments = true;
174                    config.namespaces = true;
175                }
176            }
177        });
178
179        /*
180         * True to perform namespace processing; false for none.
181         */
182        PARAMETERS.put("namespaces", new BooleanParameter() {
183            public Object get(DOMConfigurationImpl config) {
184                return config.namespaces;
185            }
186            public void set(DOMConfigurationImpl config, Object value) {
187                config.namespaces = (Boolean) value;
188            }
189        });
190
191        /**
192         * True to include namespace declarations; false to discard them
193         * (unsupported). Even when namespace declarations are discarded,
194         * prefixes are retained.
195         *
196         * Has no effect if namespaces is false.
197         */
198        PARAMETERS.put("namespace-declarations", new FixedParameter(true));
199
200        /*
201         * True to fully normalize characters (unsupported).
202         */
203        PARAMETERS.put("normalize-characters", new FixedParameter(false));
204
205        /*
206         * A list of whitespace-separated URIs representing the schemas to validate
207         * against. Has no effect if schema-type is null.
208         */
209        PARAMETERS.put("schema-location", new Parameter() {
210            public Object get(DOMConfigurationImpl config) {
211                return config.schemaLocation;
212            }
213            public void set(DOMConfigurationImpl config, Object value) {
214                config.schemaLocation = (String) value;
215            }
216            public boolean canSet(DOMConfigurationImpl config, Object value) {
217                return value == null || value instanceof String;
218            }
219        });
220
221        /*
222         * URI representing the type of schema language, such as
223         * "http://www.w3.org/2001/XMLSchema" or "http://www.w3.org/TR/REC-xml".
224         */
225        PARAMETERS.put("schema-type", new Parameter() {
226            public Object get(DOMConfigurationImpl config) {
227                return config.schemaType;
228            }
229            public void set(DOMConfigurationImpl config, Object value) {
230                config.schemaType = (String) value;
231            }
232            public boolean canSet(DOMConfigurationImpl config, Object value) {
233                return value == null || value instanceof String;
234            }
235        });
236
237        /*
238         * True to split CDATA sections containing "]]>"; false to signal an
239         * error instead.
240         */
241        PARAMETERS.put("split-cdata-sections", new BooleanParameter() {
242            public Object get(DOMConfigurationImpl config) {
243                return config.splitCdataSections;
244            }
245            public void set(DOMConfigurationImpl config, Object value) {
246                config.splitCdataSections = (Boolean) value;
247            }
248        });
249
250        /*
251         * True to require validation against a schema or DTD. Validation will
252         * recompute element content whitespace, ID and schema type data.
253         *
254         * Setting this unsets validate-if-schema.
255         */
256        PARAMETERS.put("validate", new BooleanParameter() {
257            public Object get(DOMConfigurationImpl config) {
258                return config.validate;
259            }
260            public void set(DOMConfigurationImpl config, Object value) {
261                // validate-if-schema is always false
262                config.validate = (Boolean) value;
263            }
264        });
265
266        /*
267         * True to validate if a schema was declared (unsupported). Setting this
268         * unsets validate.
269         */
270        PARAMETERS.put("validate-if-schema", new FixedParameter(false));
271
272        /*
273         * True to report invalid characters in node names, attributes, elements,
274         * comments, text, CDATA sections and processing instructions.
275         */
276        PARAMETERS.put("well-formed", new BooleanParameter() {
277            public Object get(DOMConfigurationImpl config) {
278                return config.wellFormed;
279            }
280            public void set(DOMConfigurationImpl config, Object value) {
281                config.wellFormed = (Boolean) value;
282            }
283        });
284
285        // TODO add "resource-resolver" property for use with LS feature...
286    }
287
288    private boolean cdataSections = true;
289    private boolean comments = true;
290    private boolean datatypeNormalization = false;
291    private boolean entities = true;
292    private DOMErrorHandler errorHandler;
293    private boolean namespaces = true;
294    private String schemaLocation;
295    private String schemaType;
296    private boolean splitCdataSections = true;
297    private boolean validate = false;
298    private boolean wellFormed = true;
299
300    interface Parameter {
301        Object get(DOMConfigurationImpl config);
302        void set(DOMConfigurationImpl config, Object value);
303        boolean canSet(DOMConfigurationImpl config, Object value);
304    }
305
306    static class FixedParameter implements Parameter {
307        final Object onlyValue;
308        FixedParameter(Object onlyValue) {
309            this.onlyValue = onlyValue;
310        }
311        public Object get(DOMConfigurationImpl config) {
312            return onlyValue;
313        }
314        public void set(DOMConfigurationImpl config, Object value) {
315            if (!onlyValue.equals(value)) {
316                throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
317                        "Unsupported value: " + value);
318            }
319        }
320        public boolean canSet(DOMConfigurationImpl config, Object value) {
321            return onlyValue.equals(value);
322        }
323    }
324
325    static abstract class BooleanParameter implements Parameter {
326        public boolean canSet(DOMConfigurationImpl config, Object value) {
327            return value instanceof Boolean;
328        }
329    }
330
331    public boolean canSetParameter(String name, Object value) {
332        Parameter parameter = PARAMETERS.get(name);
333        return parameter != null && parameter.canSet(this, value);
334    }
335
336    public void setParameter(String name, Object value) throws DOMException {
337        Parameter parameter = PARAMETERS.get(name);
338        if (parameter == null) {
339            throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
340        }
341        try {
342            parameter.set(this, value);
343        } catch (NullPointerException e) {
344            throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
345                    "Null not allowed for " + name);
346        } catch (ClassCastException e) {
347            throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
348                    "Invalid type for " + name + ": " + value.getClass());
349        }
350    }
351
352    public Object getParameter(String name) throws DOMException {
353        Parameter parameter = PARAMETERS.get(name);
354        if (parameter == null) {
355            throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
356        }
357        return parameter.get(this);
358    }
359
360    public DOMStringList getParameterNames() {
361        final String[] result = PARAMETERS.keySet().toArray(new String[PARAMETERS.size()]);
362        return new DOMStringList() {
363            public String item(int index) {
364                return index < result.length ? result[index] : null;
365            }
366            public int getLength() {
367                return result.length;
368            }
369            public boolean contains(String str) {
370                return PARAMETERS.containsKey(str); // case-insensitive.
371            }
372        };
373    }
374
375    public void normalize(Node node) {
376        /*
377         * Since we don't validate, this code doesn't take into account the
378         * following "supported" parameters: datatype-normalization, entities,
379         * schema-location, schema-type, or validate.
380         *
381         * TODO: normalize namespaces
382         */
383
384        switch (node.getNodeType()) {
385            case Node.CDATA_SECTION_NODE:
386                CDATASectionImpl cdata = (CDATASectionImpl) node;
387                if (cdataSections) {
388                    if (cdata.needsSplitting()) {
389                        if (splitCdataSections) {
390                            cdata.split();
391                            report(DOMError.SEVERITY_WARNING, "cdata-sections-splitted");
392                        } else {
393                            report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
394                        }
395                    }
396                    checkTextValidity(cdata.buffer);
397                    break;
398                }
399                node = cdata.replaceWithText();
400                // fall through
401
402            case Node.TEXT_NODE:
403                TextImpl text = (TextImpl) node;
404                text = text.minimize();
405                if (text != null) {
406                    checkTextValidity(text.buffer);
407                }
408                break;
409
410            case Node.COMMENT_NODE:
411                CommentImpl comment = (CommentImpl) node;
412                if (!comments) {
413                    comment.getParentNode().removeChild(comment);
414                    break;
415                }
416                if (comment.containsDashDash()) {
417                    report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
418                }
419                checkTextValidity(comment.buffer);
420                break;
421
422            case Node.PROCESSING_INSTRUCTION_NODE:
423                checkTextValidity(((ProcessingInstructionImpl) node).getData());
424                break;
425
426            case Node.ATTRIBUTE_NODE:
427                checkTextValidity(((AttrImpl) node).getValue());
428                break;
429
430            case Node.ELEMENT_NODE:
431                ElementImpl element = (ElementImpl) node;
432                NamedNodeMap attributes = element.getAttributes();
433                for (int i = 0; i < attributes.getLength(); i++) {
434                    normalize(attributes.item(i));
435                }
436                // fall through
437
438            case Node.DOCUMENT_NODE:
439            case Node.DOCUMENT_FRAGMENT_NODE:
440                Node next;
441                for (Node child = node.getFirstChild(); child != null; child = next) {
442                    // lookup next eagerly because normalize() may remove its subject
443                    next = child.getNextSibling();
444                    normalize(child);
445                }
446                break;
447
448            case Node.NOTATION_NODE:
449            case Node.DOCUMENT_TYPE_NODE:
450            case Node.ENTITY_NODE:
451            case Node.ENTITY_REFERENCE_NODE:
452                break;
453
454            default:
455                throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
456                        "Unsupported node type " + node.getNodeType());
457        }
458    }
459
460    private void checkTextValidity(CharSequence s) {
461        if (wellFormed && !isValid(s)) {
462            report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
463        }
464    }
465
466    /**
467     * Returns true if all of the characters in the text are permitted for use
468     * in XML documents.
469     */
470    private boolean isValid(CharSequence text) {
471        for (int i = 0; i < text.length(); i++) {
472            char c = text.charAt(i);
473            // as defined by http://www.w3.org/TR/REC-xml/#charsets.
474            boolean valid = c == 0x9 || c == 0xA || c == 0xD
475                    || (c >= 0x20 && c <= 0xd7ff)
476                    || (c >= 0xe000 && c <= 0xfffd);
477            if (!valid) {
478                return false;
479            }
480        }
481        return true;
482    }
483
484    private void report(short severity, String type) {
485        if (errorHandler != null) {
486            // TODO: abort if handleError returns false
487            errorHandler.handleError(new DOMErrorImpl(severity, type, type));
488        }
489    }
490}
491