DOMConfigurationImpl.java revision 6b811c5daec1b28e6f63b57f98a032236f2c3cf7
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.harmony.xml.dom;
18
19import org.apache.xml.serializer.dom3.DOMErrorImpl;
20import org.w3c.dom.DOMConfiguration;
21import org.w3c.dom.DOMError;
22import org.w3c.dom.DOMErrorHandler;
23import org.w3c.dom.DOMException;
24import org.w3c.dom.DOMStringList;
25import org.w3c.dom.NamedNodeMap;
26import org.w3c.dom.Node;
27
28import java.util.Map;
29import java.util.TreeMap;
30
31/**
32 * A minimal implementation of DOMConfiguration. This implementation uses inner
33 * parameter instances to centralize each parameter's behaviour.
34 */
35public final class DOMConfigurationImpl implements DOMConfiguration {
36
37    private static final Map<String, Parameter> PARAMETERS
38            = new TreeMap<String, Parameter>(String.CASE_INSENSITIVE_ORDER);
39
40    static {
41        /*
42         * True to canonicalize the document (unsupported). This includes
43         * removing DocumentType nodes from the tree and removing unused
44         * namespace declarations. Setting this to true also sets these
45         * parameters:
46         *   entities = false
47         *   normalize-characters = false
48         *   cdata-sections = false
49         *   namespaces = true
50         *   namespace-declarations = true
51         *   well-formed = true
52         *   element-content-whitespace = true
53         * Setting these parameters to another value shall revert the canonical
54         * form to false.
55         */
56        PARAMETERS.put("canonical-form", new FixedParameter(false));
57
58        /*
59         * True to keep existing CDATA nodes; false to replace them/merge them
60         * into adjacent text nodes.
61         */
62        PARAMETERS.put("cdata-sections", new BooleanParameter() {
63            public Object get(DOMConfigurationImpl config) {
64                return config.cdataSections;
65            }
66            public void set(DOMConfigurationImpl config, Object value) {
67                config.cdataSections = (Boolean) value;
68            }
69        });
70
71        /*
72         * True to check character normalization (unsupported).
73         */
74        PARAMETERS.put("check-character-normalization", new FixedParameter(false));
75
76        /*
77         * True to keep comments in the document; false to discard them.
78         */
79        PARAMETERS.put("comments", new BooleanParameter() {
80            public Object get(DOMConfigurationImpl config) {
81                return config.comments;
82            }
83            public void set(DOMConfigurationImpl config, Object value) {
84                config.comments = (Boolean) value;
85            }
86        });
87
88        /*
89         * True to expose schema normalized values. Setting this to true sets
90         * the validate parameter to true. Has no effect when validate is false.
91         */
92        PARAMETERS.put("datatype-normalization", new BooleanParameter() {
93            public Object get(DOMConfigurationImpl config) {
94                return config.datatypeNormalization;
95            }
96            public void set(DOMConfigurationImpl config, Object value) {
97                if ((Boolean) value) {
98                    config.datatypeNormalization = true;
99                    config.validate = true;
100                } else {
101                    config.datatypeNormalization = false;
102                }
103            }
104        });
105
106        /*
107         * True to keep whitespace elements in the document; false to discard
108         * them (unsupported).
109         */
110        PARAMETERS.put("element-content-whitespace", new FixedParameter(true));
111
112        /*
113         * True to keep entity references in the document; false to expand them.
114         */
115        PARAMETERS.put("entities", new BooleanParameter() {
116            public Object get(DOMConfigurationImpl config) {
117                return config.entities;
118            }
119            public void set(DOMConfigurationImpl config, Object value) {
120                config.entities = (Boolean) value;
121            }
122        });
123
124        /*
125         * Handler to be invoked when errors are encountered.
126         */
127        PARAMETERS.put("error-handler", new Parameter() {
128            public Object get(DOMConfigurationImpl config) {
129                return config.errorHandler;
130            }
131            public void set(DOMConfigurationImpl config, Object value) {
132                config.errorHandler = (DOMErrorHandler) value;
133            }
134            public boolean canSet(DOMConfigurationImpl config, Object value) {
135                return value == null || value instanceof DOMErrorHandler;
136            }
137        });
138
139        /*
140         * Bulk alias to set the following parameter values:
141         *   validate-if-schema = false
142         *   entities = false
143         *   datatype-normalization = false
144         *   cdata-sections = false
145         *   namespace-declarations = true
146         *   well-formed = true
147         *   element-content-whitespace = true
148         *   comments = true
149         *   namespaces = true.
150         * Querying this returns true if all of the above parameters have the
151         * listed values; false otherwise.
152         */
153        PARAMETERS.put("infoset", new BooleanParameter() {
154            public Object get(DOMConfigurationImpl config) {
155                // validate-if-schema is always false
156                // element-content-whitespace is always true
157                // namespace-declarations is always true
158                return !config.entities
159                        && !config.datatypeNormalization
160                        && !config.cdataSections
161                        && config.wellFormed
162                        && config.comments
163                        && config.namespaces;
164            }
165            public void set(DOMConfigurationImpl config, Object value) {
166                if ((Boolean) value) {
167                    // validate-if-schema is always false
168                    // element-content-whitespace is always true
169                    // namespace-declarations is always true
170                    config.entities = false;
171                    config.datatypeNormalization = false;
172                    config.cdataSections = false;
173                    config.wellFormed = true;
174                    config.comments = true;
175                    config.namespaces = true;
176                }
177            }
178        });
179
180        /*
181         * True to perform namespace processing; false for none.
182         */
183        PARAMETERS.put("namespaces", new BooleanParameter() {
184            public Object get(DOMConfigurationImpl config) {
185                return config.namespaces;
186            }
187            public void set(DOMConfigurationImpl config, Object value) {
188                config.namespaces = (Boolean) value;
189            }
190        });
191
192        /**
193         * True to include namespace declarations; false to discard them
194         * (unsupported). Even when namespace declarations are discarded,
195         * prefixes are retained.
196         *
197         * Has no effect if namespaces is false.
198         */
199        PARAMETERS.put("namespace-declarations", new FixedParameter(true));
200
201        /*
202         * True to fully normalize characters (unsupported).
203         */
204        PARAMETERS.put("normalize-characters", new FixedParameter(false));
205
206        /*
207         * A list of whitespace-separated URIs representing the schemas to validate
208         * against. Has no effect if schema-type is null.
209         */
210        PARAMETERS.put("schema-location", new Parameter() {
211            public Object get(DOMConfigurationImpl config) {
212                return config.schemaLocation;
213            }
214            public void set(DOMConfigurationImpl config, Object value) {
215                config.schemaLocation = (String) value;
216            }
217            public boolean canSet(DOMConfigurationImpl config, Object value) {
218                return value == null || value instanceof String;
219            }
220        });
221
222        /*
223         * URI representing the type of schema language, such as
224         * "http://www.w3.org/2001/XMLSchema" or "http://www.w3.org/TR/REC-xml".
225         */
226        PARAMETERS.put("schema-type", new Parameter() {
227            public Object get(DOMConfigurationImpl config) {
228                return config.schemaType;
229            }
230            public void set(DOMConfigurationImpl config, Object value) {
231                config.schemaType = (String) value;
232            }
233            public boolean canSet(DOMConfigurationImpl config, Object value) {
234                return value == null || value instanceof String;
235            }
236        });
237
238        /*
239         * True to split CDATA sections containing "]]>"; false to signal an
240         * error instead.
241         */
242        PARAMETERS.put("split-cdata-sections", new BooleanParameter() {
243            public Object get(DOMConfigurationImpl config) {
244                return config.splitCdataSections;
245            }
246            public void set(DOMConfigurationImpl config, Object value) {
247                config.splitCdataSections = (Boolean) value;
248            }
249        });
250
251        /*
252         * True to require validation against a schema or DTD. Validation will
253         * recompute element content whitespace, ID and schema type data.
254         *
255         * Setting this unsets validate-if-schema.
256         */
257        PARAMETERS.put("validate", new BooleanParameter() {
258            public Object get(DOMConfigurationImpl config) {
259                return config.validate;
260            }
261            public void set(DOMConfigurationImpl config, Object value) {
262                // validate-if-schema is always false
263                config.validate = (Boolean) value;
264            }
265        });
266
267        /*
268         * True to validate if a schema was declared (unsupported). Setting this
269         * unsets validate.
270         */
271        PARAMETERS.put("validate-if-schema", new FixedParameter(false));
272
273        /*
274         * True to report invalid characters in node names, attributes, elements,
275         * comments, text, CDATA sections and processing instructions.
276         */
277        PARAMETERS.put("well-formed", new BooleanParameter() {
278            public Object get(DOMConfigurationImpl config) {
279                return config.wellFormed;
280            }
281            public void set(DOMConfigurationImpl config, Object value) {
282                config.wellFormed = (Boolean) value;
283            }
284        });
285
286        // TODO add "resource-resolver" property for use with LS feature...
287    }
288
289    private boolean cdataSections = true;
290    private boolean comments = true;
291    private boolean datatypeNormalization = false;
292    private boolean entities = true;
293    private DOMErrorHandler errorHandler;
294    private boolean namespaces = true;
295    private String schemaLocation;
296    private String schemaType;
297    private boolean splitCdataSections = true;
298    private boolean validate = false;
299    private boolean wellFormed = true;
300
301    interface Parameter {
302        Object get(DOMConfigurationImpl config);
303        void set(DOMConfigurationImpl config, Object value);
304        boolean canSet(DOMConfigurationImpl config, Object value);
305    }
306
307    static class FixedParameter implements Parameter {
308        final Object onlyValue;
309        FixedParameter(Object onlyValue) {
310            this.onlyValue = onlyValue;
311        }
312        public Object get(DOMConfigurationImpl config) {
313            return onlyValue;
314        }
315        public void set(DOMConfigurationImpl config, Object value) {
316            if (!onlyValue.equals(value)) {
317                throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
318                        "Unsupported value: " + value);
319            }
320        }
321        public boolean canSet(DOMConfigurationImpl config, Object value) {
322            return onlyValue.equals(value);
323        }
324    }
325
326    static abstract class BooleanParameter implements Parameter {
327        public boolean canSet(DOMConfigurationImpl config, Object value) {
328            return value instanceof Boolean;
329        }
330    }
331
332    public boolean canSetParameter(String name, Object value) {
333        Parameter parameter = PARAMETERS.get(name);
334        return parameter != null && parameter.canSet(this, value);
335    }
336
337    public void setParameter(String name, Object value) throws DOMException {
338        Parameter parameter = PARAMETERS.get(name);
339        if (parameter == null) {
340            throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
341        }
342        try {
343            parameter.set(this, value);
344        } catch (NullPointerException e) {
345            throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
346                    "Null not allowed for " + name);
347        } catch (ClassCastException e) {
348            throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
349                    "Invalid type for " + name + ": " + value.getClass());
350        }
351    }
352
353    public Object getParameter(String name) throws DOMException {
354        Parameter parameter = PARAMETERS.get(name);
355        if (parameter == null) {
356            throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
357        }
358        return parameter.get(this);
359    }
360
361    public DOMStringList getParameterNames() {
362        final String[] result = PARAMETERS.keySet().toArray(new String[PARAMETERS.size()]);
363        return new DOMStringList() {
364            public String item(int index) {
365                return index < result.length ? result[index] : null;
366            }
367            public int getLength() {
368                return result.length;
369            }
370            public boolean contains(String str) {
371                return PARAMETERS.containsKey(str); // case-insensitive.
372            }
373        };
374    }
375
376    public void normalize(Node node) {
377        /*
378         * Since we don't validate, this code doesn't take into account the
379         * following "supported" parameters: datatype-normalization, entities,
380         * schema-location, schema-type, or validate.
381         *
382         * TODO: normalize namespaces
383         */
384
385        switch (node.getNodeType()) {
386            case Node.CDATA_SECTION_NODE:
387                CDATASectionImpl cdata = (CDATASectionImpl) node;
388                if (cdataSections) {
389                    if (cdata.needsSplitting()) {
390                        if (splitCdataSections) {
391                            cdata.split();
392                            report(DOMError.SEVERITY_WARNING, "cdata-sections-splitted");
393                        } else {
394                            report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
395                        }
396                    }
397                    checkTextValidity(cdata.buffer);
398                    break;
399                }
400                node = cdata.replaceWithText();
401                // fall through
402
403            case Node.TEXT_NODE:
404                TextImpl text = (TextImpl) node;
405                text = text.minimize();
406                if (text != null) {
407                    checkTextValidity(text.buffer);
408                }
409                break;
410
411            case Node.COMMENT_NODE:
412                CommentImpl comment = (CommentImpl) node;
413                if (!comments) {
414                    comment.getParentNode().removeChild(comment);
415                    break;
416                }
417                if (comment.containsDashDash()) {
418                    report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
419                }
420                checkTextValidity(comment.buffer);
421                break;
422
423            case Node.PROCESSING_INSTRUCTION_NODE:
424                checkTextValidity(((ProcessingInstructionImpl) node).getData());
425                break;
426
427            case Node.ATTRIBUTE_NODE:
428                checkTextValidity(((AttrImpl) node).getValue());
429                break;
430
431            case Node.ELEMENT_NODE:
432                ElementImpl element = (ElementImpl) node;
433                NamedNodeMap attributes = element.getAttributes();
434                for (int i = 0; i < attributes.getLength(); i++) {
435                    normalize(attributes.item(i));
436                }
437                // fall through
438
439            case Node.DOCUMENT_NODE:
440            case Node.DOCUMENT_FRAGMENT_NODE:
441                Node next;
442                for (Node child = node.getFirstChild(); child != null; child = next) {
443                    // lookup next eagerly because normalize() may remove its subject
444                    next = child.getNextSibling();
445                    normalize(child);
446                }
447                break;
448
449            case Node.NOTATION_NODE:
450            case Node.DOCUMENT_TYPE_NODE:
451            case Node.ENTITY_NODE:
452            case Node.ENTITY_REFERENCE_NODE:
453                break;
454
455            default:
456                throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
457                        "Unsupported node type " + node.getNodeType());
458        }
459    }
460
461    private void checkTextValidity(CharSequence s) {
462        if (wellFormed && !isValid(s)) {
463            report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
464        }
465    }
466
467    /**
468     * Returns true if all of the characters in the text are permitted for use
469     * in XML documents.
470     */
471    private boolean isValid(CharSequence text) {
472        for (int i = 0; i < text.length(); i++) {
473            char c = text.charAt(i);
474            // as defined by http://www.w3.org/TR/REC-xml/#charsets.
475            boolean valid = c == 0x9 || c == 0xA || c == 0xD
476                    || (c >= 0x20 && c <= 0xd7ff)
477                    || (c >= 0xe000 && c <= 0xfffd);
478            if (!valid) {
479                return false;
480            }
481        }
482        return true;
483    }
484
485    private void report(short severity, String type) {
486        if (errorHandler != null) {
487            // TODO: abort if handleError returns false
488            errorHandler.handleError(new DOMErrorImpl(severity, type, type));
489        }
490    }
491}
492