1cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng/* 2cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Copyright (C) 2012 The Android Open Source Project 3cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * 4cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Licensed under the Apache License, Version 2.0 (the "License"); 5cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * you may not use this file except in compliance with the License. 6cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * You may obtain a copy of the License at 7cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * 8cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * http://www.apache.org/licenses/LICENSE-2.0 9cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * 10cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Unless required by applicable law or agreed to in writing, software 11cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * distributed under the License is distributed on an "AS IS" BASIS, 12cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * See the License for the specific language governing permissions and 14cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * limitations under the License. 15cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng */ 16cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 17cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Chengpackage com.android.contacts.common.util; 18cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 19cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Chengimport com.google.common.annotations.VisibleForTesting; 20cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 21cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng/** 22cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Methods related to search. 23cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng */ 24cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Chengpublic class SearchUtil { 25cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 26cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng public static class MatchedLine { 27cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 28cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng public int startIndex = -1; 29cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng public String line; 30cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 31cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng @Override 32cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng public String toString() { 33cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng return "MatchedLine{" + 34cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng "line='" + line + '\'' + 35cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng ", startIndex=" + startIndex + 36cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng '}'; 37cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 38cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 39cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 40cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng /** 41cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Given a string with lines delimited with '\n', finds the matching line to the given 42cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * substring. 43cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * 44cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @param contents The string to search. 45cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @param substring The substring to search for. 46cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @return A MatchedLine object containing the matching line and the startIndex of the substring 47cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * match within that line. 48cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng */ 49cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng public static MatchedLine findMatchingLine(String contents, String substring) { 50cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng final MatchedLine matched = new MatchedLine(); 51cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 52cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // Snippet may contain multiple lines separated by "\n". 53cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // Locate the lines of the content that contain the substring. 54cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng final int index = SearchUtil.contains(contents, substring); 55cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (index != -1) { 56cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // Match found. Find the corresponding line. 57cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng int start = index - 1; 58cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng while (start > -1) { 59cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (contents.charAt(start) == '\n') { 60cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng break; 61cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 62cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng start--; 63cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 64cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng int end = index + 1; 65cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng while (end < contents.length()) { 66cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (contents.charAt(end) == '\n') { 67cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng break; 68cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 69cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng end++; 70cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 71cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng matched.line = contents.substring(start + 1, end); 72cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng matched.startIndex = index - (start + 1); 73cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 74cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng return matched; 75cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 76cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 77cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng /** 78cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Similar to String.contains() with two main differences: 79cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * <p> 80cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * 1) Only searches token prefixes. A token is defined as any combination of letters or 81cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * numbers. 82cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * <p> 83cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * 2) Returns the starting index where the substring is found. 84cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * 85cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @param value The string to search. 86cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @param substring The substring to look for. 87cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @return The starting index where the substring is found. {@literal -1} if substring is not 88cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * found in value. 89cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng */ 90cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng @VisibleForTesting 91cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng static int contains(String value, String substring) { 92cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (value.length() < substring.length()) { 93cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng return -1; 94cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 95cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 96cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // i18n support 97cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // Generate the code points for the substring once. 98cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // There will be a maximum of substring.length code points. But may be fewer. 99cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // Since the array length is not an accurate size, we need to keep a separate variable. 100cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng final int[] substringCodePoints = new int[substring.length()]; 101cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng int substringLength = 0; // may not equal substring.length()!! 102cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng for (int i = 0; i < substring.length(); ) { 103cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng final int codePoint = Character.codePointAt(substring, i); 104cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng substringCodePoints[substringLength] = codePoint; 105cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng substringLength++; 106cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng i += Character.charCount(codePoint); 107cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 108cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 109cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) { 110cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng int numMatch = 0; 1114aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) { 1124aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner int valueCp = Character.toLowerCase(value.codePointAt(j)); 1134aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner int substringCp = substringCodePoints[numMatch]; 1144aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner if (valueCp != substringCp) { 1154aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner break; 116cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 1174aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner j += Character.charCount(valueCp); 1184aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner } 1194aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner if (numMatch == substringLength) { 1204aa2f3e0fec6e8d58a1c32a232925770faeaa550Jay Shrauner return i; 121cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 122cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 123cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng return -1; 124cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 125cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 126cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng /** 127cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Find the start of the next token. A token is composed of letters and numbers. Any other 128cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * character are considered delimiters. 129cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * 130cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @param line The string to search for the next token. 131cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @param startIndex The index to start searching. 0 based indexing. 132cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @return The index for the start of the next token. line.length() if next token not found. 133cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng */ 134cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng @VisibleForTesting 135cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng static int findNextTokenStart(String line, int startIndex) { 136cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng int index = startIndex; 137cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 138cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // If already in token, eat remainder of token. 139cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng while (index <= line.length()) { 140cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (index == line.length()) { 141cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // No more tokens. 142cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng return index; 143cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 144cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng final int codePoint = line.codePointAt(index); 145cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (!Character.isLetterOrDigit(codePoint)) { 146cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng break; 147cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 148cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng index += Character.charCount(codePoint); 149cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 150cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 151cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // Out of token, eat all consecutive delimiters. 152cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng while (index <= line.length()) { 153cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (index == line.length()) { 154cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng return index; 155cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 156cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng final int codePoint = line.codePointAt(index); 157cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (Character.isLetterOrDigit(codePoint)) { 158cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng break; 159cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 160cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng index += Character.charCount(codePoint); 161cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 162cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 163cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng return index; 164cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 165cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 166cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng /** 167cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * Anything other than letter and numbers are considered delimiters. Remove start and end 168cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * delimiters since they are not relevant to search. 169cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * 170cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @param query The query string to clean. 171cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng * @return The cleaned query. Empty string if all characters are cleaned out. 172cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng */ 173cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng public static String cleanStartAndEndOfSearchQuery(String query) { 174cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng int start = 0; 175cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng while (start < query.length()) { 176cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng int codePoint = query.codePointAt(start); 177cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (Character.isLetterOrDigit(codePoint)) { 178cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng break; 179cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 180cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng start += Character.charCount(codePoint); 181cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 182cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 183cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (start == query.length()) { 184cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // All characters are delimiters. 185cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng return ""; 186cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 187cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 188cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng int end = query.length() - 1; 189cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng while (end > -1) { 190cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (Character.isLowSurrogate(query.charAt(end))) { 191cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // Assume valid i18n string. There should be a matching high surrogate before it. 192cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng end--; 193cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 194cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng int codePoint = query.codePointAt(end); 195cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng if (Character.isLetterOrDigit(codePoint)) { 196cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng break; 197cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 198cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng end--; 199cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 200cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng 201cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng // end is a letter or digit. 202cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng return query.substring(start, end + 1); 203cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng } 204cae605bed4d43e1925fc8c1803def0ef1d0924a5Chiao Cheng} 205