15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h"
11868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string16.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/history/snippet.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class QueryNodeList;
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Used by HasMatchIn.
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct QueryWord {
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The work to match against.
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string16 word;
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The starting position of the word in the original text.
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t position;
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// QueryNode is used by QueryParser to represent the elements that constitute a
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// query. While QueryNode is exposed by way of ParseQuery, it really isn't meant
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for external usage.
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class QueryNode {
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual ~QueryNode() {}
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Serialize ourselves out to a string that can be passed to SQLite. Returns
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the number of words in this node.
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual int AppendToSQLiteQuery(string16* query) const = 0;
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return true if this is a QueryNodeWord, false if it's a QueryNodeList.
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual bool IsWord() const = 0;
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if this node matches |word|. If |exact| is true, the string
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // must exactly match. Otherwise, this uses a starts with comparison.
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual bool Matches(const string16& word, bool exact) const = 0;
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if this node matches at least one of the words in |words|. An
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // entry is added to |match_positions| for all matching words giving the
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // matching regions.
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual bool HasMatchIn(const std::vector<QueryWord>& words,
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          Snippet::MatchPositions* match_positions) const = 0;
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  // Returns true if this node matches at least one of the words in |words|.
50eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  virtual bool HasMatchIn(const std::vector<QueryWord>& words) const = 0;
51eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Appends the words that make up this node in |words|.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void AppendWords(std::vector<string16>* words) const = 0;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This class is used to parse queries entered into the history search into more
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// normalized queries that can be passed to the SQLite backend.
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class QueryParser {
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  QueryParser();
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // For CJK ideographs and Korean Hangul, even a single character
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // can be useful in prefix matching, but that may give us too many
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // false positives. Moreover, the current ICU word breaker gives us
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // back every single Chinese character as a word so that there's no
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // point doing anything for them and we only adjust the minimum length
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // to 2 for Korean Hangul while using 3 for others. This is a temporary
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // hack until we have a segmentation support.
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static bool IsWordLongEnoughForPrefixSearch(const string16& word);
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Parse a query into a SQLite query. The resulting query is placed in
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // |sqlite_query| and the number of words is returned.
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int ParseQuery(const string16& query, string16* sqlite_query);
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Parses |query|, returning the words that make up it. Any words in quotes
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // are put in |words| without the quotes. For example, the query text
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "foo bar" results in two entries being added to words, one for foo and one
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // for bar.
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void ParseQueryWords(const string16& query, std::vector<string16>* words);
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Parses |query|, returning the nodes that constitute the valid words in the
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // query. This is intended for later usage with DoesQueryMatch. Ownership of
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the nodes passes to the caller.
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void ParseQueryNodes(const string16& query, std::vector<QueryNode*>* nodes);
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if the string text matches the query nodes created by a call
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // to ParseQuery. If the query does match, each of the matching positions in
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the text is added to |match_positions|.
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool DoesQueryMatch(const string16& text,
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      const std::vector<QueryNode*>& nodes,
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      Snippet::MatchPositions* match_positions);
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
93eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  // Returns true if all of the |words| match the query |nodes| created by a
94eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  // call to ParseQuery.
95eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  bool DoesQueryMatch(const std::vector<QueryWord>& words,
96eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch                      const std::vector<QueryNode*>& nodes);
97eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
98eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  // Extracts the words from |text|, placing each word into |words|.
99eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  void ExtractQueryWords(const string16& text, std::vector<QueryWord>* words);
100eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Does the work of parsing |query|; creates nodes in |root| as appropriate.
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This is invoked from both of the ParseQuery methods.
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool ParseQueryImpl(const string16& query, QueryNodeList* root);
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(QueryParser);
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
110