15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h" 11868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string16.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/history/snippet.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class QueryNodeList; 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Used by HasMatchIn. 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct QueryWord { 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The work to match against. 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16 word; 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The starting position of the word in the original text. 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t position; 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// QueryNode is used by QueryParser to represent the elements that constitute a 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// query. While QueryNode is exposed by way of ParseQuery, it really isn't meant 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for external usage. 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class QueryNode { 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual ~QueryNode() {} 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Serialize ourselves out to a string that can be passed to SQLite. Returns 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the number of words in this node. 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual int AppendToSQLiteQuery(string16* query) const = 0; 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Return true if this is a QueryNodeWord, false if it's a QueryNodeList. 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual bool IsWord() const = 0; 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if this node matches |word|. If |exact| is true, the string 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // must exactly match. Otherwise, this uses a starts with comparison. 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual bool Matches(const string16& word, bool exact) const = 0; 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if this node matches at least one of the words in |words|. An 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // entry is added to |match_positions| for all matching words giving the 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // matching regions. 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual bool HasMatchIn(const std::vector<QueryWord>& words, 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Snippet::MatchPositions* match_positions) const = 0; 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch // Returns true if this node matches at least one of the words in |words|. 50eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch virtual bool HasMatchIn(const std::vector<QueryWord>& words) const = 0; 51eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Appends the words that make up this node in |words|. 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void AppendWords(std::vector<string16>* words) const = 0; 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This class is used to parse queries entered into the history search into more 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// normalized queries that can be passed to the SQLite backend. 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class QueryParser { 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) QueryParser(); 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // For CJK ideographs and Korean Hangul, even a single character 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // can be useful in prefix matching, but that may give us too many 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // false positives. Moreover, the current ICU word breaker gives us 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // back every single Chinese character as a word so that there's no 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // point doing anything for them and we only adjust the minimum length 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to 2 for Korean Hangul while using 3 for others. This is a temporary 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // hack until we have a segmentation support. 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool IsWordLongEnoughForPrefixSearch(const string16& word); 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Parse a query into a SQLite query. The resulting query is placed in 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // |sqlite_query| and the number of words is returned. 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ParseQuery(const string16& query, string16* sqlite_query); 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Parses |query|, returning the words that make up it. Any words in quotes 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // are put in |words| without the quotes. For example, the query text 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // "foo bar" results in two entries being added to words, one for foo and one 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // for bar. 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void ParseQueryWords(const string16& query, std::vector<string16>* words); 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Parses |query|, returning the nodes that constitute the valid words in the 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // query. This is intended for later usage with DoesQueryMatch. Ownership of 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the nodes passes to the caller. 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void ParseQueryNodes(const string16& query, std::vector<QueryNode*>* nodes); 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if the string text matches the query nodes created by a call 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to ParseQuery. If the query does match, each of the matching positions in 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the text is added to |match_positions|. 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool DoesQueryMatch(const string16& text, 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::vector<QueryNode*>& nodes, 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Snippet::MatchPositions* match_positions); 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 93eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch // Returns true if all of the |words| match the query |nodes| created by a 94eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch // call to ParseQuery. 95eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch bool DoesQueryMatch(const std::vector<QueryWord>& words, 96eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch const std::vector<QueryNode*>& nodes); 97eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 98eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch // Extracts the words from |text|, placing each word into |words|. 99eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch void ExtractQueryWords(const string16& text, std::vector<QueryWord>* words); 100eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Does the work of parsing |query|; creates nodes in |root| as appropriate. 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This is invoked from both of the ParseQuery methods. 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool ParseQueryImpl(const string16& query, QueryNodeList* root); 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(QueryParser); 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 110