1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
6#define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
7
8#include <vector>
9
10#include "base/basictypes.h"
11#include "base/strings/string16.h"
12#include "chrome/browser/history/snippet.h"
13
14class QueryNodeList;
15
16// Used by HasMatchIn.
17struct QueryWord {
18  // The work to match against.
19  string16 word;
20
21  // The starting position of the word in the original text.
22  size_t position;
23};
24
25// QueryNode is used by QueryParser to represent the elements that constitute a
26// query. While QueryNode is exposed by way of ParseQuery, it really isn't meant
27// for external usage.
28class QueryNode {
29 public:
30  virtual ~QueryNode() {}
31
32  // Serialize ourselves out to a string that can be passed to SQLite. Returns
33  // the number of words in this node.
34  virtual int AppendToSQLiteQuery(string16* query) const = 0;
35
36  // Return true if this is a QueryNodeWord, false if it's a QueryNodeList.
37  virtual bool IsWord() const = 0;
38
39  // Returns true if this node matches |word|. If |exact| is true, the string
40  // must exactly match. Otherwise, this uses a starts with comparison.
41  virtual bool Matches(const string16& word, bool exact) const = 0;
42
43  // Returns true if this node matches at least one of the words in |words|. An
44  // entry is added to |match_positions| for all matching words giving the
45  // matching regions.
46  virtual bool HasMatchIn(const std::vector<QueryWord>& words,
47                          Snippet::MatchPositions* match_positions) const = 0;
48
49  // Returns true if this node matches at least one of the words in |words|.
50  virtual bool HasMatchIn(const std::vector<QueryWord>& words) const = 0;
51
52  // Appends the words that make up this node in |words|.
53  virtual void AppendWords(std::vector<string16>* words) const = 0;
54};
55
56// This class is used to parse queries entered into the history search into more
57// normalized queries that can be passed to the SQLite backend.
58class QueryParser {
59 public:
60  QueryParser();
61
62  // For CJK ideographs and Korean Hangul, even a single character
63  // can be useful in prefix matching, but that may give us too many
64  // false positives. Moreover, the current ICU word breaker gives us
65  // back every single Chinese character as a word so that there's no
66  // point doing anything for them and we only adjust the minimum length
67  // to 2 for Korean Hangul while using 3 for others. This is a temporary
68  // hack until we have a segmentation support.
69  static bool IsWordLongEnoughForPrefixSearch(const string16& word);
70
71  // Parse a query into a SQLite query. The resulting query is placed in
72  // |sqlite_query| and the number of words is returned.
73  int ParseQuery(const string16& query, string16* sqlite_query);
74
75  // Parses |query|, returning the words that make up it. Any words in quotes
76  // are put in |words| without the quotes. For example, the query text
77  // "foo bar" results in two entries being added to words, one for foo and one
78  // for bar.
79  void ParseQueryWords(const string16& query, std::vector<string16>* words);
80
81  // Parses |query|, returning the nodes that constitute the valid words in the
82  // query. This is intended for later usage with DoesQueryMatch. Ownership of
83  // the nodes passes to the caller.
84  void ParseQueryNodes(const string16& query, std::vector<QueryNode*>* nodes);
85
86  // Returns true if the string text matches the query nodes created by a call
87  // to ParseQuery. If the query does match, each of the matching positions in
88  // the text is added to |match_positions|.
89  bool DoesQueryMatch(const string16& text,
90                      const std::vector<QueryNode*>& nodes,
91                      Snippet::MatchPositions* match_positions);
92
93  // Returns true if all of the |words| match the query |nodes| created by a
94  // call to ParseQuery.
95  bool DoesQueryMatch(const std::vector<QueryWord>& words,
96                      const std::vector<QueryNode*>& nodes);
97
98  // Extracts the words from |text|, placing each word into |words|.
99  void ExtractQueryWords(const string16& text, std::vector<QueryWord>* words);
100
101 private:
102  // Does the work of parsing |query|; creates nodes in |root| as appropriate.
103  // This is invoked from both of the ParseQuery methods.
104  bool ParseQueryImpl(const string16& query, QueryNodeList* root);
105
106  DISALLOW_COPY_AND_ASSIGN(QueryParser);
107};
108
109#endif  // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
110