1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.harmony.xml.dom;
18
19import java.util.Map;
20import java.util.TreeMap;
21import org.w3c.dom.DOMConfiguration;
22import org.w3c.dom.DOMError;
23import org.w3c.dom.DOMErrorHandler;
24import org.w3c.dom.DOMException;
25import org.w3c.dom.DOMStringList;
26import org.w3c.dom.NamedNodeMap;
27import org.w3c.dom.Node;
28
29/**
30 * A minimal implementation of DOMConfiguration. This implementation uses inner
31 * parameter instances to centralize each parameter's behavior.
32 */
33public final class DOMConfigurationImpl implements DOMConfiguration {
34
35    private static final Map<String, Parameter> PARAMETERS
36            = new TreeMap<String, Parameter>(String.CASE_INSENSITIVE_ORDER);
37
38    static {
39        /*
40         * True to canonicalize the document (unsupported). This includes
41         * removing DocumentType nodes from the tree and removing unused
42         * namespace declarations. Setting this to true also sets these
43         * parameters:
44         *   entities = false
45         *   normalize-characters = false
46         *   cdata-sections = false
47         *   namespaces = true
48         *   namespace-declarations = true
49         *   well-formed = true
50         *   element-content-whitespace = true
51         * Setting these parameters to another value shall revert the canonical
52         * form to false.
53         */
54        PARAMETERS.put("canonical-form", new FixedParameter(false));
55
56        /*
57         * True to keep existing CDATA nodes; false to replace them/merge them
58         * into adjacent text nodes.
59         */
60        PARAMETERS.put("cdata-sections", new BooleanParameter() {
61            public Object get(DOMConfigurationImpl config) {
62                return config.cdataSections;
63            }
64            public void set(DOMConfigurationImpl config, Object value) {
65                config.cdataSections = (Boolean) value;
66            }
67        });
68
69        /*
70         * True to check character normalization (unsupported).
71         */
72        PARAMETERS.put("check-character-normalization", new FixedParameter(false));
73
74        /*
75         * True to keep comments in the document; false to discard them.
76         */
77        PARAMETERS.put("comments", new BooleanParameter() {
78            public Object get(DOMConfigurationImpl config) {
79                return config.comments;
80            }
81            public void set(DOMConfigurationImpl config, Object value) {
82                config.comments = (Boolean) value;
83            }
84        });
85
86        /*
87         * True to expose schema normalized values. Setting this to true sets
88         * the validate parameter to true. Has no effect when validate is false.
89         */
90        PARAMETERS.put("datatype-normalization", new BooleanParameter() {
91            public Object get(DOMConfigurationImpl config) {
92                return config.datatypeNormalization;
93            }
94            public void set(DOMConfigurationImpl config, Object value) {
95                if ((Boolean) value) {
96                    config.datatypeNormalization = true;
97                    config.validate = true;
98                } else {
99                    config.datatypeNormalization = false;
100                }
101            }
102        });
103
104        /*
105         * True to keep whitespace elements in the document; false to discard
106         * them (unsupported).
107         */
108        PARAMETERS.put("element-content-whitespace", new FixedParameter(true));
109
110        /*
111         * True to keep entity references in the document; false to expand them.
112         */
113        PARAMETERS.put("entities", new BooleanParameter() {
114            public Object get(DOMConfigurationImpl config) {
115                return config.entities;
116            }
117            public void set(DOMConfigurationImpl config, Object value) {
118                config.entities = (Boolean) value;
119            }
120        });
121
122        /*
123         * Handler to be invoked when errors are encountered.
124         */
125        PARAMETERS.put("error-handler", new Parameter() {
126            public Object get(DOMConfigurationImpl config) {
127                return config.errorHandler;
128            }
129            public void set(DOMConfigurationImpl config, Object value) {
130                config.errorHandler = (DOMErrorHandler) value;
131            }
132            public boolean canSet(DOMConfigurationImpl config, Object value) {
133                return value == null || value instanceof DOMErrorHandler;
134            }
135        });
136
137        /*
138         * Bulk alias to set the following parameter values:
139         *   validate-if-schema = false
140         *   entities = false
141         *   datatype-normalization = false
142         *   cdata-sections = false
143         *   namespace-declarations = true
144         *   well-formed = true
145         *   element-content-whitespace = true
146         *   comments = true
147         *   namespaces = true.
148         * Querying this returns true if all of the above parameters have the
149         * listed values; false otherwise.
150         */
151        PARAMETERS.put("infoset", new BooleanParameter() {
152            public Object get(DOMConfigurationImpl config) {
153                // validate-if-schema is always false
154                // element-content-whitespace is always true
155                // namespace-declarations is always true
156                return !config.entities
157                        && !config.datatypeNormalization
158                        && !config.cdataSections
159                        && config.wellFormed
160                        && config.comments
161                        && config.namespaces;
162            }
163            public void set(DOMConfigurationImpl config, Object value) {
164                if ((Boolean) value) {
165                    // validate-if-schema is always false
166                    // element-content-whitespace is always true
167                    // namespace-declarations is always true
168                    config.entities = false;
169                    config.datatypeNormalization = false;
170                    config.cdataSections = false;
171                    config.wellFormed = true;
172                    config.comments = true;
173                    config.namespaces = true;
174                }
175            }
176        });
177
178        /*
179         * True to perform namespace processing; false for none.
180         */
181        PARAMETERS.put("namespaces", new BooleanParameter() {
182            public Object get(DOMConfigurationImpl config) {
183                return config.namespaces;
184            }
185            public void set(DOMConfigurationImpl config, Object value) {
186                config.namespaces = (Boolean) value;
187            }
188        });
189
190        /**
191         * True to include namespace declarations; false to discard them
192         * (unsupported). Even when namespace declarations are discarded,
193         * prefixes are retained.
194         *
195         * Has no effect if namespaces is false.
196         */
197        PARAMETERS.put("namespace-declarations", new FixedParameter(true));
198
199        /*
200         * True to fully normalize characters (unsupported).
201         */
202        PARAMETERS.put("normalize-characters", new FixedParameter(false));
203
204        /*
205         * A list of whitespace-separated URIs representing the schemas to validate
206         * against. Has no effect if schema-type is null.
207         */
208        PARAMETERS.put("schema-location", new Parameter() {
209            public Object get(DOMConfigurationImpl config) {
210                return config.schemaLocation;
211            }
212            public void set(DOMConfigurationImpl config, Object value) {
213                config.schemaLocation = (String) value;
214            }
215            public boolean canSet(DOMConfigurationImpl config, Object value) {
216                return value == null || value instanceof String;
217            }
218        });
219
220        /*
221         * URI representing the type of schema language, such as
222         * "http://www.w3.org/2001/XMLSchema" or "http://www.w3.org/TR/REC-xml".
223         */
224        PARAMETERS.put("schema-type", new Parameter() {
225            public Object get(DOMConfigurationImpl config) {
226                return config.schemaType;
227            }
228            public void set(DOMConfigurationImpl config, Object value) {
229                config.schemaType = (String) value;
230            }
231            public boolean canSet(DOMConfigurationImpl config, Object value) {
232                return value == null || value instanceof String;
233            }
234        });
235
236        /*
237         * True to split CDATA sections containing "]]>"; false to signal an
238         * error instead.
239         */
240        PARAMETERS.put("split-cdata-sections", new BooleanParameter() {
241            public Object get(DOMConfigurationImpl config) {
242                return config.splitCdataSections;
243            }
244            public void set(DOMConfigurationImpl config, Object value) {
245                config.splitCdataSections = (Boolean) value;
246            }
247        });
248
249        /*
250         * True to require validation against a schema or DTD. Validation will
251         * recompute element content whitespace, ID and schema type data.
252         *
253         * Setting this unsets validate-if-schema.
254         */
255        PARAMETERS.put("validate", new BooleanParameter() {
256            public Object get(DOMConfigurationImpl config) {
257                return config.validate;
258            }
259            public void set(DOMConfigurationImpl config, Object value) {
260                // validate-if-schema is always false
261                config.validate = (Boolean) value;
262            }
263        });
264
265        /*
266         * True to validate if a schema was declared (unsupported). Setting this
267         * unsets validate.
268         */
269        PARAMETERS.put("validate-if-schema", new FixedParameter(false));
270
271        /*
272         * True to report invalid characters in node names, attributes, elements,
273         * comments, text, CDATA sections and processing instructions.
274         */
275        PARAMETERS.put("well-formed", new BooleanParameter() {
276            public Object get(DOMConfigurationImpl config) {
277                return config.wellFormed;
278            }
279            public void set(DOMConfigurationImpl config, Object value) {
280                config.wellFormed = (Boolean) value;
281            }
282        });
283
284        // TODO add "resource-resolver" property for use with LS feature...
285    }
286
287    private boolean cdataSections = true;
288    private boolean comments = true;
289    private boolean datatypeNormalization = false;
290    private boolean entities = true;
291    private DOMErrorHandler errorHandler;
292    private boolean namespaces = true;
293    private String schemaLocation;
294    private String schemaType;
295    private boolean splitCdataSections = true;
296    private boolean validate = false;
297    private boolean wellFormed = true;
298
299    interface Parameter {
300        Object get(DOMConfigurationImpl config);
301        void set(DOMConfigurationImpl config, Object value);
302        boolean canSet(DOMConfigurationImpl config, Object value);
303    }
304
305    static class FixedParameter implements Parameter {
306        final Object onlyValue;
307        FixedParameter(Object onlyValue) {
308            this.onlyValue = onlyValue;
309        }
310        public Object get(DOMConfigurationImpl config) {
311            return onlyValue;
312        }
313        public void set(DOMConfigurationImpl config, Object value) {
314            if (!onlyValue.equals(value)) {
315                throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
316                        "Unsupported value: " + value);
317            }
318        }
319        public boolean canSet(DOMConfigurationImpl config, Object value) {
320            return onlyValue.equals(value);
321        }
322    }
323
324    static abstract class BooleanParameter implements Parameter {
325        public boolean canSet(DOMConfigurationImpl config, Object value) {
326            return value instanceof Boolean;
327        }
328    }
329
330    public boolean canSetParameter(String name, Object value) {
331        Parameter parameter = PARAMETERS.get(name);
332        return parameter != null && parameter.canSet(this, value);
333    }
334
335    public void setParameter(String name, Object value) throws DOMException {
336        Parameter parameter = PARAMETERS.get(name);
337        if (parameter == null) {
338            throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
339        }
340        try {
341            parameter.set(this, value);
342        } catch (NullPointerException e) {
343            throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
344                    "Null not allowed for " + name);
345        } catch (ClassCastException e) {
346            throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
347                    "Invalid type for " + name + ": " + value.getClass());
348        }
349    }
350
351    public Object getParameter(String name) throws DOMException {
352        Parameter parameter = PARAMETERS.get(name);
353        if (parameter == null) {
354            throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
355        }
356        return parameter.get(this);
357    }
358
359    public DOMStringList getParameterNames() {
360        final String[] result = PARAMETERS.keySet().toArray(new String[PARAMETERS.size()]);
361        return new DOMStringList() {
362            public String item(int index) {
363                return index < result.length ? result[index] : null;
364            }
365            public int getLength() {
366                return result.length;
367            }
368            public boolean contains(String str) {
369                return PARAMETERS.containsKey(str); // case-insensitive.
370            }
371        };
372    }
373
374    public void normalize(Node node) {
375        /*
376         * Since we don't validate, this code doesn't take into account the
377         * following "supported" parameters: datatype-normalization, entities,
378         * schema-location, schema-type, or validate.
379         *
380         * TODO: normalize namespaces
381         */
382
383        switch (node.getNodeType()) {
384            case Node.CDATA_SECTION_NODE:
385                CDATASectionImpl cdata = (CDATASectionImpl) node;
386                if (cdataSections) {
387                    if (cdata.needsSplitting()) {
388                        if (splitCdataSections) {
389                            cdata.split();
390                            report(DOMError.SEVERITY_WARNING, "cdata-sections-splitted");
391                        } else {
392                            report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
393                        }
394                    }
395                    checkTextValidity(cdata.buffer);
396                    break;
397                }
398                node = cdata.replaceWithText();
399                // fall through
400
401            case Node.TEXT_NODE:
402                TextImpl text = (TextImpl) node;
403                text = text.minimize();
404                if (text != null) {
405                    checkTextValidity(text.buffer);
406                }
407                break;
408
409            case Node.COMMENT_NODE:
410                CommentImpl comment = (CommentImpl) node;
411                if (!comments) {
412                    comment.getParentNode().removeChild(comment);
413                    break;
414                }
415                if (comment.containsDashDash()) {
416                    report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
417                }
418                checkTextValidity(comment.buffer);
419                break;
420
421            case Node.PROCESSING_INSTRUCTION_NODE:
422                checkTextValidity(((ProcessingInstructionImpl) node).getData());
423                break;
424
425            case Node.ATTRIBUTE_NODE:
426                checkTextValidity(((AttrImpl) node).getValue());
427                break;
428
429            case Node.ELEMENT_NODE:
430                ElementImpl element = (ElementImpl) node;
431                NamedNodeMap attributes = element.getAttributes();
432                for (int i = 0; i < attributes.getLength(); i++) {
433                    normalize(attributes.item(i));
434                }
435                // fall through
436
437            case Node.DOCUMENT_NODE:
438            case Node.DOCUMENT_FRAGMENT_NODE:
439                Node next;
440                for (Node child = node.getFirstChild(); child != null; child = next) {
441                    // lookup next eagerly because normalize() may remove its subject
442                    next = child.getNextSibling();
443                    normalize(child);
444                }
445                break;
446
447            case Node.NOTATION_NODE:
448            case Node.DOCUMENT_TYPE_NODE:
449            case Node.ENTITY_NODE:
450            case Node.ENTITY_REFERENCE_NODE:
451                break;
452
453            default:
454                throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
455                        "Unsupported node type " + node.getNodeType());
456        }
457    }
458
459    private void checkTextValidity(CharSequence s) {
460        if (wellFormed && !isValid(s)) {
461            report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
462        }
463    }
464
465    /**
466     * Returns true if all of the characters in the text are permitted for use
467     * in XML documents.
468     */
469    private boolean isValid(CharSequence text) {
470        for (int i = 0; i < text.length(); i++) {
471            char c = text.charAt(i);
472            // as defined by http://www.w3.org/TR/REC-xml/#charsets.
473            boolean valid = c == 0x9 || c == 0xA || c == 0xD
474                    || (c >= 0x20 && c <= 0xd7ff)
475                    || (c >= 0xe000 && c <= 0xfffd);
476            if (!valid) {
477                return false;
478            }
479        }
480        return true;
481    }
482
483    private void report(short severity, String type) {
484        if (errorHandler != null) {
485            // TODO: abort if handleError returns false
486            errorHandler.handleError(new DOMErrorImpl(severity, type));
487        }
488    }
489}
490