1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * This is a series of unit tests for snippet creation and highlighting
19 *
20 * You can run this entire test case with:
21 *   runtest -c com.android.emailcommon.utility.TextUtilitiesTests email
22 */
23package com.android.emailcommon.utility;
24
25import android.test.AndroidTestCase;
26import android.test.suitebuilder.annotation.SmallTest;
27
28import android.text.SpannableStringBuilder;
29import android.text.style.BackgroundColorSpan;
30
31@SmallTest
32public class TextUtilitiesTests extends AndroidTestCase {
33
34    public void testPlainSnippet() {
35        // Test the simplest cases
36        assertEquals("", TextUtilities.makeSnippetFromPlainText(null));
37        assertEquals("", TextUtilities.makeSnippetFromPlainText(""));
38
39        // Test handling leading, trailing, and duplicated whitespace
40        // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
41        // other whitespace should be fine as well
42        assertEquals("", TextUtilities.makeSnippetFromPlainText(" \n\r\t\r\t\n"));
43        char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
44        assertEquals("foo", TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \n\t\r" + c));
45        assertEquals("foo bar",
46                TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
47
48        // Handle duplicated - and =
49        assertEquals("Foo-Bar=Bletch",
50                TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
51
52        // We shouldn't muck with HTML entities
53        assertEquals(" >", TextUtilities.makeSnippetFromPlainText(" >"));
54    }
55
56    public void testHtmlSnippet() {
57        // Test the simplest cases
58        assertEquals("", TextUtilities.makeSnippetFromHtmlText(null));
59        assertEquals("", TextUtilities.makeSnippetFromHtmlText(""));
60
61        // Test handling leading, trailing, and duplicated whitespace
62        // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
63        // other whitespace should be fine as well
64        assertEquals("", TextUtilities.makeSnippetFromHtmlText(" \n\r\t\r\t\n"));
65        char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
66        assertEquals("foo", TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \n\t\r" + c));
67        assertEquals("foo bar",
68                TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
69
70        // Handle duplicated - and =
71        assertEquals("Foo-Bar=Bletch",
72                TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
73
74        // We should catch HTML entities in these tests
75        assertEquals(">", TextUtilities.makeSnippetFromHtmlText(" >"));
76        assertEquals("&<> \"", TextUtilities.makeSnippetFromHtmlText("&amp;&lt;&gt;&nbsp;&quot;"));
77        // Test for decimal and hex entities
78        assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("&#65;&#66;&#67;"));
79        assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("&#x41;&#x42;&#x43;"));
80
81        // Test for stripping simple tags
82        assertEquals("Hi there", TextUtilities.makeSnippetFromHtmlText("<html>Hi there</html>"));
83        // TODO: Add tests here if/when we find problematic HTML
84    }
85
86    public void testStripHtmlEntityEdgeCases() {
87        int[] skipCount = new int[1];
88        // Bare & isn't an entity
89        char c = TextUtilities.stripHtmlEntity("&", 0, skipCount);
90        assertEquals(c, '&');
91        assertEquals(0, skipCount[0]);
92        // Also not legal
93        c = TextUtilities.stripHtmlEntity("&;", 0, skipCount);
94        assertEquals(c, '&');
95        assertEquals(0, skipCount[0]);
96        // This is an entity, but shouldn't be found
97        c = TextUtilities.stripHtmlEntity("&nosuch;", 0, skipCount);
98        assertEquals(c, '&');
99        assertEquals(0, skipCount[0]);
100        // This is too long for an entity, even though it starts like a valid one
101        c = TextUtilities.stripHtmlEntity("&nbspandmore;", 0, skipCount);
102        assertEquals(c, '&');
103        assertEquals(0, skipCount[0]);
104        // Illegal decimal entities
105        c = TextUtilities.stripHtmlEntity("&#ABC", 0, skipCount);
106        assertEquals(c, '&');
107        assertEquals(0, skipCount[0]);
108        c = TextUtilities.stripHtmlEntity("&#12B", 0, skipCount);
109        assertEquals(c, '&');
110        assertEquals(0, skipCount[0]);
111        // Illegal hex entities
112        c = TextUtilities.stripHtmlEntity("&#xABC", 0, skipCount);
113        assertEquals(c, '&');
114        assertEquals(0, skipCount[0]);
115        // Illegal hex entities
116        c = TextUtilities.stripHtmlEntity("&#x19G", 0, skipCount);
117        assertEquals(c, '&');
118        assertEquals(0, skipCount[0]);
119    }
120
121    public void testStripContent() {
122        assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
123            "<html><style foo=\"bar\">Not</style>Visible</html>"));
124        assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
125            "<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>"));
126        assertEquals("IsVisible", TextUtilities.makeSnippetFromHtmlText(
127            "<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"));
128        assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
129            "<html>Visible<style foo=\"bar\">Not"));
130        assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
131            "<html>Visible<style foo=\"bar\">Not</style>AgainVisible"));
132        assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
133            "<html>Visible<style foo=\"bar\"/>AgainVisible"));
134        assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
135            "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"));
136    }
137
138    /**
139     * We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position
140     * for the tag named 'tag' and then check whether the calculated end position matches the known
141     * correct position.  HTML text not containing an ampersand should generate a calculated end of
142     * -1
143     * @param text the HTML text to test
144     */
145    private void findTagEnd(String text, String tag) {
146        int calculatedEnd = TextUtilities.findTagEnd(text , tag, 0);
147        int knownEnd = text.indexOf('@') + 2;
148        if (knownEnd == 1) {
149            // indexOf will return -1, so we'll get 1 as knownEnd
150            assertEquals(-1, calculatedEnd);
151        } else {
152            assertEquals(calculatedEnd, knownEnd);
153        }
154    }
155
156    public void testFindTagEnd() {
157        // Test with <tag ... />
158        findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag");
159        // Test with <tag ...> ... </tag>
160        findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag");
161        // Test with incomplete tag
162        findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag");
163        // Test with space at end of tag
164        findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag ");
165    }
166
167    private void assertHighlightUnchanged(String str) {
168        assertEquals(str, TextUtilities.highlightTermsInHtml(str, null));
169    }
170
171    public void testHighlightNoTerm() {
172        // With no search terms, the html should be unchanged
173        assertHighlightUnchanged("<html><style foo=\"bar\">Not</style>Visible</html>");
174        assertHighlightUnchanged("<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>");
175        assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not");
176        assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not</style>AgainVisible");
177        assertHighlightUnchanged("<html>Visible<style foo=\"bar\"/>AgainVisible");
178        assertHighlightUnchanged(
179                "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible");
180    }
181
182    public void testHighlightSingleTermHtml() {
183        String str = "<html><style foo=\"bar\">Not</style>Visible</html>";
184        // Test that tags aren't highlighted
185        assertEquals(str, TextUtilities.highlightTermsInHtml(
186                "<html><style foo=\"bar\">Not</style>Visible</html>", "style"));
187        // Test that non-tags are
188        assertEquals("<html><style foo=\"bar\">Not</style><span " +
189                "style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
190                "\">Visi</span>ble</html>",
191                TextUtilities.highlightTermsInHtml(str, "Visi"));
192        assertEquals("<html>Visible<style foo=\"bar\">Not</style>A<span" +
193                " style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
194                "\">gain</span>Visible",
195                TextUtilities.highlightTermsInHtml(
196                        "<html>Visible<style foo=\"bar\">Not</style>AgainVisible", "gain"));
197    }
198
199    public void brokentestHighlightSingleTermText() {
200        // Sprinkle text with a few HTML characters to make sure they're ignored
201        String text = "This< should be visibl>e";
202        // We should find this, because search terms are case insensitive
203        SpannableStringBuilder ssb =
204            (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Visi");
205        BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
206        assertEquals(1, spans.length);
207        BackgroundColorSpan span = spans[0];
208        assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
209        assertEquals(text.indexOf("bl>e"), ssb.getSpanEnd(span));
210        // Heh; this next test fails.. we use the search term!
211        assertEquals(text, ssb.toString());
212
213        // Multiple instances of the term
214        text = "The research word should be a search result";
215        ssb = (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Search");
216        spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
217        assertEquals(2, spans.length);
218        span = spans[0];
219        assertEquals(text.indexOf("search word"), ssb.getSpanStart(span));
220        assertEquals(text.indexOf(" word"), ssb.getSpanEnd(span));
221        span = spans[1];
222        assertEquals(text.indexOf("search result"), ssb.getSpanStart(span));
223        assertEquals(text.indexOf(" result"), ssb.getSpanEnd(span));
224        assertEquals(text, ssb.toString());
225    }
226
227    public void brokentestHighlightTwoTermText() {
228        String text = "This should be visible";
229        // We should find this, because search terms are case insensitive
230        SpannableStringBuilder ssb =
231            (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "visi should");
232        BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
233        assertEquals(2, spans.length);
234        BackgroundColorSpan span = spans[0];
235        assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
236        assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
237        span = spans[1];
238        assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
239        assertEquals(text.indexOf("ble"), ssb.getSpanEnd(span));
240        assertEquals(text, ssb.toString());
241    }
242
243    public void brokentestHighlightDuplicateTermText() {
244        String text = "This should be visible";
245        // We should find this, because search terms are case insensitive
246        SpannableStringBuilder ssb =
247            (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should should");
248        BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
249        assertEquals(1, spans.length);
250        BackgroundColorSpan span = spans[0];
251        assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
252        assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
253    }
254
255    public void brokentestHighlightOverlapTermText() {
256        String text = "This shoulder is visible";
257        // We should find this, because search terms are case insensitive
258        SpannableStringBuilder ssb =
259            (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should ould");
260        BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
261        assertEquals(1, spans.length);
262        BackgroundColorSpan span = spans[0];
263        assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
264        assertEquals(text.indexOf("er is"), ssb.getSpanEnd(span));
265    }
266
267
268    public void brokentestHighlightOverlapTermText2() {
269        String text = "The shoulders are visible";
270        // We should find this, because search terms are case insensitive
271        SpannableStringBuilder ssb =
272            (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "shoulder shoulders");
273        BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
274        assertEquals(2, spans.length);
275        BackgroundColorSpan span = spans[0];
276        assertEquals(text.indexOf("shoulder"), ssb.getSpanStart(span));
277        assertEquals(text.indexOf("s are visible"), ssb.getSpanEnd(span));
278        span = spans[1];
279        // Just the 's' should be caught in the 2nd span
280        assertEquals(text.indexOf("s are visible"), ssb.getSpanStart(span));
281        assertEquals(text.indexOf(" are visible"), ssb.getSpanEnd(span));
282        assertEquals(text, ssb.toString());
283    }
284    // For debugging large HTML samples
285
286//    private String readLargeSnippet(String fn) {
287//        File file = mContext.getFileStreamPath(fn);
288//        StringBuffer sb = new StringBuffer();
289//        BufferedReader reader = null;
290//        try {
291//            String text;
292//            reader = new BufferedReader(new FileReader(file));
293//            while ((text = reader.readLine()) != null) {
294//                sb.append(text);
295//                sb.append(" ");
296//            }
297//        } catch (IOException e) {
298//        }
299//        return sb.toString();
300//    }
301 }
302