1cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng/*
2cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Copyright (C) 2012 The Android Open Source Project
3cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng *
4cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Licensed under the Apache License, Version 2.0 (the "License");
5cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * you may not use this file except in compliance with the License.
6cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * You may obtain a copy of the License at
7cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng *
8cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng *      http://www.apache.org/licenses/LICENSE-2.0
9cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng *
10cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Unless required by applicable law or agreed to in writing, software
11cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * distributed under the License is distributed on an "AS IS" BASIS,
12cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * See the License for the specific language governing permissions and
14cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * limitations under the License.
15cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng */
16cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
17cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Chengpackage com.android.contacts.common.util;
18cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
19cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Chengimport com.google.common.annotations.VisibleForTesting;
20cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
21cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng/**
22cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Methods related to search.
23cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng */
24cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Chengpublic class SearchUtil {
25cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
26cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    public static class MatchedLine {
27cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
28cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        public int startIndex = -1;
29cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        public String line;
30cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
31cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        @Override
32cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        public String toString() {
33cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            return "MatchedLine{" +
34cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                    "line='" + line + '\'' +
35cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                    ", startIndex=" + startIndex +
36cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                    '}';
37cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        }
38cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    }
39cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
40cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    /**
41cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * Given a string with lines delimited with '\n', finds the matching line to the given
42cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * substring.
43cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     *
44cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @param contents The string to search.
45cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @param substring The substring to search for.
46cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @return A MatchedLine object containing the matching line and the startIndex of the substring
47cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * match within that line.
48cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     */
49cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    public static MatchedLine findMatchingLine(String contents, String substring) {
50cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        final MatchedLine matched = new MatchedLine();
51cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
52cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        // Snippet may contain multiple lines separated by "\n".
53cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        // Locate the lines of the content that contain the substring.
54cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        final int index = SearchUtil.contains(contents, substring);
55cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        if (index != -1) {
56cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            // Match found.  Find the corresponding line.
57cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            int start = index - 1;
58cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            while (start > -1) {
59cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                if (contents.charAt(start) == '\n') {
60cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                    break;
61cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                }
62cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                start--;
63cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            }
64cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            int end = index + 1;
65cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            while (end < contents.length()) {
66cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                if (contents.charAt(end) == '\n') {
67cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                    break;
68cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                }
69cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                end++;
70cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            }
71cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            matched.line = contents.substring(start + 1, end);
72cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            matched.startIndex = index - (start + 1);
73cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        }
74cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        return matched;
75cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    }
76cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
77cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    /**
78cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * Similar to String.contains() with two main differences:
79cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * <p>
80cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * 1) Only searches token prefixes.  A token is defined as any combination of letters or
81cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * numbers.
82cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * <p>
83cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * 2) Returns the starting index where the substring is found.
84cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     *
85cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @param value The string to search.
86cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @param substring The substring to look for.
87cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @return The starting index where the substring is found. {@literal -1} if substring is not
88cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     *         found in value.
89cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     */
90cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    @VisibleForTesting
91cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    static int contains(String value, String substring) {
92cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        if (value.length() < substring.length()) {
93cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            return -1;
94cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        }
95cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
96cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        // i18n support
97cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        // Generate the code points for the substring once.
98cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        // There will be a maximum of substring.length code points.  But may be fewer.
99cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        // Since the array length is not an accurate size, we need to keep a separate variable.
100cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        final int[] substringCodePoints = new int[substring.length()];
101cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        int substringLength = 0;  // may not equal substring.length()!!
102cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        for (int i = 0; i < substring.length(); ) {
103cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            final int codePoint = Character.codePointAt(substring, i);
104cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            substringCodePoints[substringLength] = codePoint;
105cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            substringLength++;
106cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            i += Character.charCount(codePoint);
107cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        }
108cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
109cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
110cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            int numMatch = 0;
1114aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner            for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) {
1124aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner                int valueCp = Character.toLowerCase(value.codePointAt(j));
1134aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner                int substringCp = substringCodePoints[numMatch];
1144aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner                if (valueCp != substringCp) {
1154aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner                    break;
116cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                }
1174aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner                j += Character.charCount(valueCp);
1184aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner            }
1194aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner            if (numMatch == substringLength) {
1204aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner                return i;
121cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            }
122cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        }
123cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        return -1;
124cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    }
125cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
126cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    /**
127cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * Find the start of the next token.  A token is composed of letters and numbers. Any other
128cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * character are considered delimiters.
129cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     *
130cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @param line The string to search for the next token.
131cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @param startIndex The index to start searching.  0 based indexing.
132cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @return The index for the start of the next token.  line.length() if next token not found.
133cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     */
134cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    @VisibleForTesting
135cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    static int findNextTokenStart(String line, int startIndex) {
136cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        int index = startIndex;
137cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
138cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        // If already in token, eat remainder of token.
139cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        while (index <= line.length()) {
140cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            if (index == line.length()) {
141cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                // No more tokens.
142cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                return index;
143cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            }
144cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            final int codePoint = line.codePointAt(index);
145cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            if (!Character.isLetterOrDigit(codePoint)) {
146cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                break;
147cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            }
148cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            index += Character.charCount(codePoint);
149cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        }
150cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
151cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        // Out of token, eat all consecutive delimiters.
152cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        while (index <= line.length()) {
153cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            if (index == line.length()) {
154cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                return index;
155cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            }
156cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            final int codePoint = line.codePointAt(index);
157cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            if (Character.isLetterOrDigit(codePoint)) {
158cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                break;
159cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            }
160cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            index += Character.charCount(codePoint);
161cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        }
162cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
163cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        return index;
164cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    }
165cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
166cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    /**
167cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * Anything other than letter and numbers are considered delimiters.  Remove start and end
168cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * delimiters since they are not relevant to search.
169cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     *
170cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @param query The query string to clean.
171cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     * @return The cleaned query. Empty string if all characters are cleaned out.
172cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng     */
173cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    public static String cleanStartAndEndOfSearchQuery(String query) {
174cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        int start = 0;
175cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        while (start < query.length()) {
176cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            int codePoint = query.codePointAt(start);
177cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            if (Character.isLetterOrDigit(codePoint)) {
178cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                break;
179cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            }
180cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            start += Character.charCount(codePoint);
181cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        }
182cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
183cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        if (start == query.length()) {
184cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            // All characters are delimiters.
185cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            return "";
186cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        }
187cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
188cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        int end = query.length() - 1;
189cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        while (end > -1) {
190cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            if (Character.isLowSurrogate(query.charAt(end))) {
191cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                // Assume valid i18n string.  There should be a matching high surrogate before it.
192cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                end--;
193cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            }
194cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            int codePoint = query.codePointAt(end);
195cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            if (Character.isLetterOrDigit(codePoint)) {
196cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng                break;
197cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            }
198cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng            end--;
199cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        }
200cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng
201cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        // end is a letter or digit.
202cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng        return query.substring(start, end + 1);
203cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng    }
204cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng}
205