1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.contacts.common.util;
18
19import com.google.common.annotations.VisibleForTesting;
20
21/**
22 * Methods related to search.
23 */
24public class SearchUtil {
25
26    public static class MatchedLine {
27
28        public int startIndex = -1;
29        public String line;
30
31        @Override
32        public String toString() {
33            return "MatchedLine{" +
34                    "line='" + line + '\'' +
35                    ", startIndex=" + startIndex +
36                    '}';
37        }
38    }
39
40    /**
41     * Given a string with lines delimited with '\n', finds the matching line to the given
42     * substring.
43     *
44     * @param contents The string to search.
45     * @param substring The substring to search for.
46     * @return A MatchedLine object containing the matching line and the startIndex of the substring
47     * match within that line.
48     */
49    public static MatchedLine findMatchingLine(String contents, String substring) {
50        final MatchedLine matched = new MatchedLine();
51
52        // Snippet may contain multiple lines separated by "\n".
53        // Locate the lines of the content that contain the substring.
54        final int index = SearchUtil.contains(contents, substring);
55        if (index != -1) {
56            // Match found.  Find the corresponding line.
57            int start = index - 1;
58            while (start > -1) {
59                if (contents.charAt(start) == '\n') {
60                    break;
61                }
62                start--;
63            }
64            int end = index + 1;
65            while (end < contents.length()) {
66                if (contents.charAt(end) == '\n') {
67                    break;
68                }
69                end++;
70            }
71            matched.line = contents.substring(start + 1, end);
72            matched.startIndex = index - (start + 1);
73        }
74        return matched;
75    }
76
77    /**
78     * Similar to String.contains() with two main differences:
79     * <p>
80     * 1) Only searches token prefixes.  A token is defined as any combination of letters or
81     * numbers.
82     * <p>
83     * 2) Returns the starting index where the substring is found.
84     *
85     * @param value The string to search.
86     * @param substring The substring to look for.
87     * @return The starting index where the substring is found. {@literal -1} if substring is not
88     *         found in value.
89     */
90    @VisibleForTesting
91    static int contains(String value, String substring) {
92        if (value.length() < substring.length()) {
93            return -1;
94        }
95
96        // i18n support
97        // Generate the code points for the substring once.
98        // There will be a maximum of substring.length code points.  But may be fewer.
99        // Since the array length is not an accurate size, we need to keep a separate variable.
100        final int[] substringCodePoints = new int[substring.length()];
101        int substringLength = 0;  // may not equal substring.length()!!
102        for (int i = 0; i < substring.length(); ) {
103            final int codePoint = Character.codePointAt(substring, i);
104            substringCodePoints[substringLength] = codePoint;
105            substringLength++;
106            i += Character.charCount(codePoint);
107        }
108
109        for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
110            int numMatch = 0;
111            for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) {
112                int valueCp = Character.toLowerCase(value.codePointAt(j));
113                int substringCp = substringCodePoints[numMatch];
114                if (valueCp != substringCp) {
115                    break;
116                }
117                j += Character.charCount(valueCp);
118            }
119            if (numMatch == substringLength) {
120                return i;
121            }
122        }
123        return -1;
124    }
125
126    /**
127     * Find the start of the next token.  A token is composed of letters and numbers. Any other
128     * character are considered delimiters.
129     *
130     * @param line The string to search for the next token.
131     * @param startIndex The index to start searching.  0 based indexing.
132     * @return The index for the start of the next token.  line.length() if next token not found.
133     */
134    @VisibleForTesting
135    static int findNextTokenStart(String line, int startIndex) {
136        int index = startIndex;
137
138        // If already in token, eat remainder of token.
139        while (index <= line.length()) {
140            if (index == line.length()) {
141                // No more tokens.
142                return index;
143            }
144            final int codePoint = line.codePointAt(index);
145            if (!Character.isLetterOrDigit(codePoint)) {
146                break;
147            }
148            index += Character.charCount(codePoint);
149        }
150
151        // Out of token, eat all consecutive delimiters.
152        while (index <= line.length()) {
153            if (index == line.length()) {
154                return index;
155            }
156            final int codePoint = line.codePointAt(index);
157            if (Character.isLetterOrDigit(codePoint)) {
158                break;
159            }
160            index += Character.charCount(codePoint);
161        }
162
163        return index;
164    }
165
166    /**
167     * Anything other than letter and numbers are considered delimiters.  Remove start and end
168     * delimiters since they are not relevant to search.
169     *
170     * @param query The query string to clean.
171     * @return The cleaned query. Empty string if all characters are cleaned out.
172     */
173    public static String cleanStartAndEndOfSearchQuery(String query) {
174        int start = 0;
175        while (start < query.length()) {
176            int codePoint = query.codePointAt(start);
177            if (Character.isLetterOrDigit(codePoint)) {
178                break;
179            }
180            start += Character.charCount(codePoint);
181        }
182
183        if (start == query.length()) {
184            // All characters are delimiters.
185            return "";
186        }
187
188        int end = query.length() - 1;
189        while (end > -1) {
190            if (Character.isLowSurrogate(query.charAt(end))) {
191                // Assume valid i18n string.  There should be a matching high surrogate before it.
192                end--;
193            }
194            int codePoint = query.codePointAt(end);
195            if (Character.isLetterOrDigit(codePoint)) {
196                break;
197            }
198            end--;
199        }
200
201        // end is a letter or digit.
202        return query.substring(start, end + 1);
203    }
204}
205