1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 6#define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 7 8#include <vector> 9 10#include "base/basictypes.h" 11#include "base/strings/string16.h" 12#include "chrome/browser/history/snippet.h" 13 14class QueryNodeList; 15 16// Used by HasMatchIn. 17struct QueryWord { 18 // The work to match against. 19 string16 word; 20 21 // The starting position of the word in the original text. 22 size_t position; 23}; 24 25// QueryNode is used by QueryParser to represent the elements that constitute a 26// query. While QueryNode is exposed by way of ParseQuery, it really isn't meant 27// for external usage. 28class QueryNode { 29 public: 30 virtual ~QueryNode() {} 31 32 // Serialize ourselves out to a string that can be passed to SQLite. Returns 33 // the number of words in this node. 34 virtual int AppendToSQLiteQuery(string16* query) const = 0; 35 36 // Return true if this is a QueryNodeWord, false if it's a QueryNodeList. 37 virtual bool IsWord() const = 0; 38 39 // Returns true if this node matches |word|. If |exact| is true, the string 40 // must exactly match. Otherwise, this uses a starts with comparison. 41 virtual bool Matches(const string16& word, bool exact) const = 0; 42 43 // Returns true if this node matches at least one of the words in |words|. An 44 // entry is added to |match_positions| for all matching words giving the 45 // matching regions. 46 virtual bool HasMatchIn(const std::vector<QueryWord>& words, 47 Snippet::MatchPositions* match_positions) const = 0; 48 49 // Returns true if this node matches at least one of the words in |words|. 50 virtual bool HasMatchIn(const std::vector<QueryWord>& words) const = 0; 51 52 // Appends the words that make up this node in |words|. 53 virtual void AppendWords(std::vector<string16>* words) const = 0; 54}; 55 56// This class is used to parse queries entered into the history search into more 57// normalized queries that can be passed to the SQLite backend. 58class QueryParser { 59 public: 60 QueryParser(); 61 62 // For CJK ideographs and Korean Hangul, even a single character 63 // can be useful in prefix matching, but that may give us too many 64 // false positives. Moreover, the current ICU word breaker gives us 65 // back every single Chinese character as a word so that there's no 66 // point doing anything for them and we only adjust the minimum length 67 // to 2 for Korean Hangul while using 3 for others. This is a temporary 68 // hack until we have a segmentation support. 69 static bool IsWordLongEnoughForPrefixSearch(const string16& word); 70 71 // Parse a query into a SQLite query. The resulting query is placed in 72 // |sqlite_query| and the number of words is returned. 73 int ParseQuery(const string16& query, string16* sqlite_query); 74 75 // Parses |query|, returning the words that make up it. Any words in quotes 76 // are put in |words| without the quotes. For example, the query text 77 // "foo bar" results in two entries being added to words, one for foo and one 78 // for bar. 79 void ParseQueryWords(const string16& query, std::vector<string16>* words); 80 81 // Parses |query|, returning the nodes that constitute the valid words in the 82 // query. This is intended for later usage with DoesQueryMatch. Ownership of 83 // the nodes passes to the caller. 84 void ParseQueryNodes(const string16& query, std::vector<QueryNode*>* nodes); 85 86 // Returns true if the string text matches the query nodes created by a call 87 // to ParseQuery. If the query does match, each of the matching positions in 88 // the text is added to |match_positions|. 89 bool DoesQueryMatch(const string16& text, 90 const std::vector<QueryNode*>& nodes, 91 Snippet::MatchPositions* match_positions); 92 93 // Returns true if all of the |words| match the query |nodes| created by a 94 // call to ParseQuery. 95 bool DoesQueryMatch(const std::vector<QueryWord>& words, 96 const std::vector<QueryNode*>& nodes); 97 98 // Extracts the words from |text|, placing each word into |words|. 99 void ExtractQueryWords(const string16& text, std::vector<QueryWord>* words); 100 101 private: 102 // Does the work of parsing |query|; creates nodes in |root| as appropriate. 103 // This is invoked from both of the ParseQuery methods. 104 bool ParseQueryImpl(const string16& query, QueryNodeList* root); 105 106 DISALLOW_COPY_AND_ASSIGN(QueryParser); 107}; 108 109#endif // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 110