1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package libcore.xml;
18
19import junit.framework.TestCase;
20import org.w3c.dom.CDATASection;
21import org.w3c.dom.Comment;
22import org.w3c.dom.DOMConfiguration;
23import org.w3c.dom.DOMError;
24import org.w3c.dom.DOMErrorHandler;
25import org.w3c.dom.DOMException;
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.NodeList;
30import org.w3c.dom.ProcessingInstruction;
31import org.w3c.dom.Text;
32import org.xml.sax.InputSource;
33
34import javax.xml.parsers.DocumentBuilderFactory;
35import javax.xml.transform.OutputKeys;
36import javax.xml.transform.Transformer;
37import javax.xml.transform.TransformerException;
38import javax.xml.transform.TransformerFactory;
39import javax.xml.transform.dom.DOMSource;
40import javax.xml.transform.stream.StreamResult;
41import java.io.StringReader;
42import java.io.StringWriter;
43import java.util.ArrayList;
44import java.util.Arrays;
45import java.util.Collections;
46import java.util.List;
47
48/**
49 * Tests the acceptance of various parameters on the DOM configuration. This
50 * test assumes the same set of parameters as the RI version 1.5. Perfectly
51 * correct DOM implementations may fail this test because it assumes certain
52 * parameters will be unsupported.
53 */
54public class NormalizeTest extends TestCase {
55
56    private Document document;
57    private DOMConfiguration domConfiguration;
58
59    String[] infosetImpliesFalse = {
60            "validate-if-schema", "entities", "datatype-normalization", "cdata-sections" };
61    String[] infosetImpliesTrue = { "namespace-declarations", "well-formed",
62            "element-content-whitespace", "comments", "namespaces" };
63
64    @Override protected void setUp() throws Exception {
65        document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
66        domConfiguration = document.getDomConfig();
67    }
68
69    public void testCanonicalForm() {
70        assertEquals(false, domConfiguration.getParameter("canonical-form"));
71        assertSupported("canonical-form", false);
72        assertUnsupported("canonical-form", true);
73    }
74
75    public void testCdataSections() {
76        assertEquals(true, domConfiguration.getParameter("cdata-sections"));
77        assertSupported("cdata-sections", false);
78        assertSupported("cdata-sections", true);
79    }
80
81    public void testCheckCharacterNormalization() {
82        assertEquals(false, domConfiguration.getParameter("check-character-normalization"));
83        assertSupported("check-character-normalization", false);
84        assertUnsupported("check-character-normalization", true);
85    }
86
87    public void testComments() {
88        assertEquals(true, domConfiguration.getParameter("comments"));
89        assertSupported("comments", false);
90        assertSupported("comments", true);
91    }
92
93    public void testDatatypeNormalization() {
94        assertEquals(false, domConfiguration.getParameter("datatype-normalization"));
95        assertSupported("datatype-normalization", false);
96        assertSupported("datatype-normalization", true);
97
98        // setting this parameter to true should set validate to true...
99        domConfiguration.setParameter("validate", false);
100        domConfiguration.setParameter("datatype-normalization", true);
101        assertEquals(true, domConfiguration.getParameter("validate"));
102
103        // ...but the negative case isn't so
104        domConfiguration.setParameter("datatype-normalization", false);
105        assertEquals(true, domConfiguration.getParameter("validate"));
106    }
107
108    public void testElementContentWhitespace() {
109        assertEquals(true, domConfiguration.getParameter("element-content-whitespace"));
110        assertUnsupported("element-content-whitespace", false);
111        assertSupported("element-content-whitespace", true);
112    }
113
114    public void testEntities() {
115        assertEquals(true, domConfiguration.getParameter("entities"));
116        assertSupported("entities", false);
117        assertSupported("entities", true);
118    }
119
120    public void testErrorHandler() {
121        assertEquals(null, domConfiguration.getParameter("error-handler"));
122        assertSupported("error-handler", null);
123        assertSupported("error-handler", new DOMErrorHandler() {
124            public boolean handleError(DOMError error) {
125                return true;
126            }
127        });
128    }
129
130    public void testInfoset() {
131        assertEquals(false, domConfiguration.getParameter("infoset"));
132        assertSupported("infoset", false);
133        assertSupported("infoset", true);
134    }
135
136    public void testSettingInfosetUpdatesImplied() {
137        // first clear those other parameters
138        for (String name : infosetImpliesFalse) {
139            if (domConfiguration.canSetParameter(name, true)) {
140                domConfiguration.setParameter(name, true);
141            }
142        }
143        for (String name : infosetImpliesTrue) {
144            if (domConfiguration.canSetParameter(name, false)) {
145                domConfiguration.setParameter(name, false);
146            }
147        }
148
149        // set infoset
150        domConfiguration.setParameter("infoset", true);
151
152        // now the parameters should all match what infoset implies
153        for (String name : infosetImpliesFalse) {
154            assertEquals(false, domConfiguration.getParameter(name));
155        }
156        for (String name : infosetImpliesTrue) {
157            assertEquals(true, domConfiguration.getParameter(name));
158        }
159    }
160
161    public void testSettingImpliedUpdatesInfoset() {
162        for (String name : infosetImpliesFalse) {
163            domConfiguration.setParameter("infoset", true);
164            if (domConfiguration.canSetParameter(name, true)) {
165                domConfiguration.setParameter(name, true);
166                assertEquals(false, domConfiguration.getParameter("infoset"));
167            }
168        }
169
170        for (String name : infosetImpliesTrue) {
171            domConfiguration.setParameter("infoset", true);
172            if (domConfiguration.canSetParameter(name, false)) {
173                domConfiguration.setParameter(name, false);
174                assertEquals(false, domConfiguration.getParameter("infoset"));
175            }
176        }
177    }
178
179    public void testNamespaces() {
180        assertEquals(true, domConfiguration.getParameter("namespaces"));
181        assertSupported("namespaces", false);
182        assertSupported("namespaces", true);
183    }
184
185    public void testNamespaceDeclarations() {
186        assertEquals(true, domConfiguration.getParameter("namespace-declarations"));
187        assertUnsupported("namespace-declarations", false); // supported in RI 6
188        assertSupported("namespace-declarations", true);
189    }
190
191    public void testNormalizeCharacters() {
192        assertEquals(false, domConfiguration.getParameter("normalize-characters"));
193        assertSupported("normalize-characters", false);
194        assertUnsupported("normalize-characters", true);
195    }
196
197    public void testSchemaLocation() {
198        assertEquals(null, domConfiguration.getParameter("schema-location"));
199        assertSupported("schema-location", "http://foo");
200        assertSupported("schema-location", null);
201    }
202
203    /**
204     * This fails under the RI because setParameter() succeeds even though
205     * canSetParameter() returns false.
206     */
207    public void testSchemaTypeDtd() {
208        assertUnsupported("schema-type", "http://www.w3.org/TR/REC-xml"); // supported in RI v6
209    }
210
211    public void testSchemaTypeXmlSchema() {
212        assertEquals(null, domConfiguration.getParameter("schema-type"));
213        assertSupported("schema-type", null);
214        assertSupported("schema-type", "http://www.w3.org/2001/XMLSchema");
215    }
216
217    public void testSplitCdataSections() {
218        assertEquals(true, domConfiguration.getParameter("split-cdata-sections"));
219        assertSupported("split-cdata-sections", false);
220        assertSupported("split-cdata-sections", true);
221    }
222
223    public void testValidate() {
224        assertEquals(false, domConfiguration.getParameter("validate"));
225        assertSupported("validate", false);
226        assertSupported("validate", true);
227    }
228
229    public void testValidateIfSchema() {
230        assertEquals(false, domConfiguration.getParameter("validate-if-schema"));
231        assertSupported("validate-if-schema", false);
232        assertUnsupported("validate-if-schema", true);
233    }
234
235    public void testWellFormed() {
236        assertEquals(true, domConfiguration.getParameter("well-formed"));
237        assertSupported("well-formed", false);
238        assertSupported("well-formed", true);
239    }
240
241    public void testMissingParameter() {
242        assertFalse(domConfiguration.canSetParameter("foo", true));
243        try {
244            domConfiguration.getParameter("foo");
245            fail();
246        } catch (DOMException e) {
247        }
248        try {
249            domConfiguration.setParameter("foo", true);
250            fail();
251        } catch (DOMException e) {
252        }
253    }
254
255    public void testNullKey() {
256        try {
257            domConfiguration.canSetParameter(null, true);
258            fail();
259        } catch (NullPointerException e) {
260        }
261        try {
262            domConfiguration.getParameter(null);
263            fail();
264        } catch (NullPointerException e) {
265        }
266        try {
267            domConfiguration.setParameter(null, true);
268            fail();
269        } catch (NullPointerException e) {
270        }
271    }
272
273    public void testNullValue() {
274        String message = "This implementation's canSetParameter() disagrees"
275                + " with its setParameter()";
276        try {
277            domConfiguration.setParameter("well-formed", null);
278            fail(message);
279        } catch (DOMException e) {
280        }
281        assertEquals(message, false, domConfiguration.canSetParameter("well-formed", null));
282    }
283
284    public void testTypeMismatch() {
285        assertEquals(false, domConfiguration.canSetParameter("well-formed", "true"));
286        try {
287            domConfiguration.setParameter("well-formed", "true");
288            fail();
289        } catch (DOMException e) {
290        }
291
292        assertEquals(false, domConfiguration.canSetParameter("well-formed", new Object()));
293        try {
294            domConfiguration.setParameter("well-formed", new Object());
295            fail();
296        } catch (DOMException e) {
297        }
298    }
299
300    private void assertUnsupported(String name, Object value) {
301        String message = "This implementation's setParameter() supports an unexpected value: "
302                + name + "=" + value;
303        assertFalse(message, domConfiguration.canSetParameter(name, value));
304        try {
305            domConfiguration.setParameter(name, value);
306            fail(message);
307        } catch (DOMException e) {
308            assertEquals(DOMException.NOT_SUPPORTED_ERR, e.code);
309        }
310        try {
311            domConfiguration.setParameter(name.toUpperCase(), value);
312            fail(message);
313        } catch (DOMException e) {
314            assertEquals(DOMException.NOT_SUPPORTED_ERR, e.code);
315        }
316        assertFalse(value.equals(domConfiguration.getParameter(name)));
317    }
318
319    private void assertSupported(String name, Object value) {
320        String message = "This implementation's canSetParameter() disagrees"
321                + " with its setParameter() for " + name + "=" + value;
322        try {
323            domConfiguration.setParameter(name, value);
324        } catch (DOMException e) {
325            if (domConfiguration.canSetParameter(name, value)) {
326                fail(message);
327            } else {
328                fail("This implementation's setParameter() doesn't support: "
329                        + name + "=" + value);
330            }
331        }
332        assertTrue(message, domConfiguration.canSetParameter(name.toUpperCase(), value));
333        assertTrue(message, domConfiguration.canSetParameter(name, value));
334        assertEquals(value, domConfiguration.getParameter(name));
335        domConfiguration.setParameter(name.toUpperCase(), value);
336        assertEquals(value, domConfiguration.getParameter(name.toUpperCase()));
337    }
338
339    public void testCdataSectionsNotHonoredByNodeNormalize() throws Exception {
340        String xml = "<foo>ABC<![CDATA[DEF]]>GHI</foo>";
341        parse(xml);
342        domConfiguration.setParameter("cdata-sections", true);
343        document.getDocumentElement().normalize();
344        assertEquals(xml, domToString(document));
345
346        parse(xml);
347        domConfiguration.setParameter("cdata-sections", false);
348        document.getDocumentElement().normalize();
349        assertEquals(xml, domToString(document));
350    }
351
352    public void testCdataSectionsHonoredByDocumentNormalize() throws Exception {
353        String xml = "<foo>ABC<![CDATA[DEF]]>GHI</foo>";
354        parse(xml);
355        domConfiguration.setParameter("cdata-sections", true);
356        document.normalizeDocument();
357        assertEquals(xml, domToString(document));
358
359        parse(xml);
360        domConfiguration.setParameter("cdata-sections", false);
361        document.normalizeDocument();
362        String expected = xml.replace("<![CDATA[DEF]]>", "DEF");
363        assertEquals(expected, domToString(document));
364    }
365
366    public void testMergeAdjacentTextNodes() throws Exception {
367        document = createDocumentWithAdjacentTexts("abc", "def");
368        document.getDocumentElement().normalize();
369        assertChildren(document.getDocumentElement(), "abcdef");
370    }
371
372    public void testMergeAdjacentEmptyTextNodes() throws Exception {
373        document = createDocumentWithAdjacentTexts("", "", "");
374        document.getDocumentElement().normalize();
375        assertChildren(document.getDocumentElement());
376    }
377
378    public void testMergeAdjacentNodesWithNonTextSiblings() throws Exception {
379        document = createDocumentWithAdjacentTexts("abc", "def", "<br>", "ghi", "jkl");
380        document.getDocumentElement().normalize();
381        assertChildren(document.getDocumentElement(), "abcdef", "<br>", "ghijkl");
382    }
383
384    public void testMergeAdjacentNodesEliminatesEmptyTexts() throws Exception {
385        document = createDocumentWithAdjacentTexts("", "", "<br>", "", "", "<br>", "", "<br>", "");
386        document.getDocumentElement().normalize();
387        assertChildren(document.getDocumentElement(), "<br>", "<br>", "<br>");
388    }
389
390    public void testRetainingComments() throws Exception {
391        String xml = "<foo>ABC<!-- bar -->DEF<!-- baz -->GHI</foo>";
392        parse(xml);
393        domConfiguration.setParameter("comments", true);
394        document.normalizeDocument();
395        assertEquals(xml, domToString(document));
396    }
397
398    public void testCommentContainingDoubleDash() throws Exception {
399        ErrorRecorder errorRecorder = new ErrorRecorder();
400        domConfiguration.setParameter("error-handler", errorRecorder);
401        domConfiguration.setParameter("namespaces", false);
402        Element root = document.createElement("foo");
403        document.appendChild(root);
404        root.appendChild(document.createComment("ABC -- DEF"));
405        document.normalizeDocument();
406        errorRecorder.assertAllErrors(DOMError.SEVERITY_ERROR, "wf-invalid-character");
407    }
408
409    public void testStrippingComments() throws Exception {
410        String xml = "<foo>ABC<!-- bar -->DEF<!-- baz -->GHI</foo>";
411        parse(xml);
412        domConfiguration.setParameter("comments", false);
413        document.normalizeDocument();
414        assertChildren(document.getDocumentElement(), "ABCDEFGHI");
415    }
416
417    public void testSplittingCdataSectionsSplit() throws Exception {
418        ErrorRecorder errorRecorder = new ErrorRecorder();
419        domConfiguration.setParameter("split-cdata-sections", true);
420        domConfiguration.setParameter("error-handler", errorRecorder);
421        domConfiguration.setParameter("namespaces", false);
422        Element root = document.createElement("foo");
423        document.appendChild(root);
424        root.appendChild(document.createCDATASection("ABC]]>DEF]]>GHI"));
425        document.normalizeDocument();
426        errorRecorder.assertAllErrors(DOMError.SEVERITY_WARNING, "cdata-sections-splitted");
427        assertChildren(root, "<![CDATA[ABC]]]]>", "<![CDATA[>DEF]]]]>", "<![CDATA[>GHI]]>");
428    }
429
430    public void testSplittingCdataSectionsReportError() throws Exception {
431        ErrorRecorder errorRecorder = new ErrorRecorder();
432        domConfiguration.setParameter("split-cdata-sections", false);
433        domConfiguration.setParameter("error-handler", errorRecorder);
434        domConfiguration.setParameter("namespaces", false);
435        Element root = document.createElement("foo");
436        document.appendChild(root);
437        root.appendChild(document.createCDATASection("ABC]]>DEF"));
438        document.normalizeDocument();
439        errorRecorder.assertAllErrors(DOMError.SEVERITY_ERROR, "wf-invalid-character");
440    }
441
442    public void testInvalidCharactersCdata() throws Exception {
443        ErrorRecorder errorRecorder = new ErrorRecorder();
444        domConfiguration.setParameter("cdata-sections", true);
445        domConfiguration.setParameter("error-handler", errorRecorder);
446        domConfiguration.setParameter("namespaces", false);
447        Element root = document.createElement("foo");
448        document.appendChild(root);
449        CDATASection cdata = document.createCDATASection("");
450        root.appendChild(cdata);
451
452        for (int c = 0; c <= Character.MAX_VALUE; c++) {
453            cdata.setData(new String(new char[]{ 'A', 'B', (char) c }));
454            document.normalizeDocument();
455            if (isValid((char) c)) {
456                assertEquals(Collections.<DOMError>emptyList(), errorRecorder.errors);
457            } else {
458                errorRecorder.assertAllErrors("For character " + c,
459                        DOMError.SEVERITY_ERROR, "wf-invalid-character");
460            }
461        }
462    }
463
464    public void testInvalidCharactersText() throws Exception {
465        ErrorRecorder errorRecorder = new ErrorRecorder();
466        domConfiguration.setParameter("error-handler", errorRecorder);
467        domConfiguration.setParameter("namespaces", false);
468        Element root = document.createElement("foo");
469        document.appendChild(root);
470        Text text = document.createTextNode("");
471        root.appendChild(text);
472
473        for (int c = 0; c <= Character.MAX_VALUE; c++) {
474            text.setData(new String(new char[]{ 'A', 'B', (char) c }));
475            document.normalizeDocument();
476            if (isValid((char) c)) {
477                assertEquals(Collections.<DOMError>emptyList(), errorRecorder.errors);
478            } else {
479                errorRecorder.assertAllErrors("For character " + c,
480                        DOMError.SEVERITY_ERROR, "wf-invalid-character");
481            }
482        }
483    }
484
485    public void testInvalidCharactersAttribute() throws Exception {
486        ErrorRecorder errorRecorder = new ErrorRecorder();
487        domConfiguration.setParameter("error-handler", errorRecorder);
488        domConfiguration.setParameter("namespaces", false);
489        Element root = document.createElement("foo");
490        document.appendChild(root);
491
492        for (int c = 0; c <= Character.MAX_VALUE; c++) {
493            root.setAttribute("bar", new String(new char[] { 'A', 'B', (char) c}));
494            document.normalizeDocument();
495            if (isValid((char) c)) {
496                assertEquals(Collections.<DOMError>emptyList(), errorRecorder.errors);
497            } else {
498                errorRecorder.assertAllErrors("For character " + c,
499                        DOMError.SEVERITY_ERROR, "wf-invalid-character");
500            }
501        }
502    }
503
504    public void testInvalidCharactersComment() throws Exception {
505        ErrorRecorder errorRecorder = new ErrorRecorder();
506        domConfiguration.setParameter("error-handler", errorRecorder);
507        domConfiguration.setParameter("namespaces", false);
508        Element root = document.createElement("foo");
509        document.appendChild(root);
510        Comment comment = document.createComment("");
511        root.appendChild(comment);
512
513        for (int c = 0; c <= Character.MAX_VALUE; c++) {
514            comment.setData(new String(new char[] { 'A', 'B', (char) c}));
515            document.normalizeDocument();
516            if (isValid((char) c)) {
517                assertEquals(Collections.<DOMError>emptyList(), errorRecorder.errors);
518            } else {
519                errorRecorder.assertAllErrors("For character " + c,
520                        DOMError.SEVERITY_ERROR, "wf-invalid-character");
521            }
522        }
523    }
524
525    public void testInvalidCharactersProcessingInstructionData() throws Exception {
526        ErrorRecorder errorRecorder = new ErrorRecorder();
527        domConfiguration.setParameter("error-handler", errorRecorder);
528        domConfiguration.setParameter("namespaces", false);
529        Element root = document.createElement("foo");
530        document.appendChild(root);
531        ProcessingInstruction pi = document.createProcessingInstruction("foo", "");
532        root.appendChild(pi);
533
534        for (int c = 0; c <= Character.MAX_VALUE; c++) {
535            pi.setData(new String(new char[] { 'A', 'B', (char) c}));
536            document.normalizeDocument();
537            if (isValid((char) c)) {
538                assertEquals(Collections.<DOMError>emptyList(), errorRecorder.errors);
539            } else {
540                errorRecorder.assertAllErrors("For character " + c,
541                        DOMError.SEVERITY_ERROR, "wf-invalid-character");
542            }
543        }
544    }
545
546    // TODO: test for surrogates
547
548    private boolean isValid(char c) {
549        // as defined by http://www.w3.org/TR/REC-xml/#charsets.
550        return c == 0x9 || c == 0xA || c == 0xD || (c >= 0x20 && c <= 0xd7ff)
551                || (c >= 0xe000 && c <= 0xfffd);
552    }
553
554    private Document createDocumentWithAdjacentTexts(String... texts) throws Exception {
555        Document result = DocumentBuilderFactory.newInstance()
556                .newDocumentBuilder().newDocument();
557        Element root = result.createElement("foo");
558        result.appendChild(root);
559        for (String text : texts) {
560            if (text.equals("<br>")) {
561                root.appendChild(result.createElement("br"));
562            } else {
563                root.appendChild(result.createTextNode(text));
564            }
565        }
566        return result;
567    }
568
569    private void assertChildren(Element element, String... texts) {
570        List<String> actual = new ArrayList<String>();
571        NodeList nodes = element.getChildNodes();
572        for (int i = 0; i < nodes.getLength(); i++) {
573            Node node = nodes.item(i);
574            if (node.getNodeType() == Node.TEXT_NODE) {
575                actual.add(((Text) node).getData());
576            } else if (node.getNodeType() == Node.CDATA_SECTION_NODE) {
577                actual.add("<![CDATA[" + ((CDATASection) node).getData() + "]]>");
578            } else {
579                actual.add("<" + node.getNodeName() + ">");
580            }
581        }
582        assertEquals(Arrays.asList(texts), actual);
583    }
584
585    private void parse(String xml) throws Exception {
586        document = DocumentBuilderFactory.newInstance().newDocumentBuilder()
587                .parse(new InputSource(new StringReader(xml)));
588        domConfiguration = document.getDomConfig();
589    }
590
591    private String domToString(Document document) throws TransformerException {
592        StringWriter writer = new StringWriter();
593        Transformer transformer = TransformerFactory.newInstance() .newTransformer();
594        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
595        transformer.transform(new DOMSource(document), new StreamResult(writer));
596        return writer.toString();
597    }
598
599    private class ErrorRecorder implements DOMErrorHandler {
600        private final List<DOMError> errors = new ArrayList<DOMError>();
601
602        public boolean handleError(DOMError error) {
603            errors.add(error);
604            return true;
605        }
606
607        public void assertAllErrors(int severity, String type) {
608            assertAllErrors("Expected one or more " + type + " errors", severity, type);
609        }
610
611        public void assertAllErrors(String message, int severity, String type) {
612            assertFalse(message, errors.isEmpty());
613            for (DOMError error : errors) {
614                assertEquals(message, severity, error.getSeverity());
615                assertEquals(message, type, error.getType());
616            }
617            errors.clear();
618        }
619    }
620}
621