1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// The query parser is used to parse queries entered into the history
6// search into more normalized queries can be passed to the SQLite backend.
7
8#ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
9#define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
10#pragma once
11
12#include <vector>
13
14#include "base/string16.h"
15#include "chrome/browser/history/snippet.h"
16
17class QueryNodeList;
18
19// Used by HasMatchIn.
20struct QueryWord {
21  // The work to match against.
22  string16 word;
23
24  // The starting position of the word in the original text.
25  size_t position;
26};
27
28// QueryNode is used by QueryNodeParser to represent the elements that
29// constitute a query. While QueryNode is exposed by way of ParseQuery, it
30// really isn't meant for external usage.
31class QueryNode {
32 public:
33  virtual ~QueryNode() {}
34
35  // Serialize ourselves out to a string that can be passed to SQLite. Returns
36  // the number of words in this node.
37  virtual int AppendToSQLiteQuery(string16* query) const = 0;
38
39  // Return true if this is a word node, false if it's a QueryNodeList.
40  virtual bool IsWord() const = 0;
41
42  // Returns true if this node matches the specified text. If exact is true,
43  // the string must exactly match. Otherwise, this uses a starts with
44  // comparison.
45  virtual bool Matches(const string16& word, bool exact) const = 0;
46
47  // Returns true if this node matches at least one of the words in words. If
48  // the node matches at least one word, an entry is added to match_positions
49  // giving the matching region.
50  virtual bool HasMatchIn(const std::vector<QueryWord>& words,
51                          Snippet::MatchPositions* match_positions) const = 0;
52
53  // Appends the words that make up this node in |words|.
54  virtual void AppendWords(std::vector<string16>* words) const = 0;
55};
56
57
58class QueryParser {
59 public:
60  QueryParser();
61
62  // For CJK ideographs and Korean Hangul, even a single character
63  // can be useful in prefix matching, but that may give us too many
64  // false positives. Moreover, the current ICU word breaker gives us
65  // back every single Chinese character as a word so that there's no
66  // point doing anything for them and we only adjust the minimum length
67  // to 2 for Korean Hangul while using 3 for others. This is a temporary
68  // hack until we have a segmentation support.
69  static bool IsWordLongEnoughForPrefixSearch(const string16& word);
70
71  // Parse a query into a SQLite query. The resulting query is placed in
72  // sqlite_query and the number of words is returned.
73  int ParseQuery(const string16& query,
74                 string16* sqlite_query);
75
76  // Parses the query words in query, returning the nodes that constitute the
77  // valid words in the query. This is intended for later usage with
78  // DoesQueryMatch.
79  // Ownership of the nodes passes to the caller.
80  void ParseQuery(const string16& query,
81                  std::vector<QueryNode*>* nodes);
82
83  // Parses a query returning the words that make up the query. Any words in
84  // quotes are put in |words| without the quotes. For example, the query text
85  // "foo bar" results in two entries being added to words, one for foo and one
86  // for bar.
87  void ExtractQueryWords(const string16& query,
88                         std::vector<string16>* words);
89
90  // Returns true if the string text matches the query nodes created by a call
91  // to ParseQuery. If the query does match each of the matching positions in
92  // the text is added to |match_positions|.
93  bool DoesQueryMatch(const string16& text,
94                      const std::vector<QueryNode*>& nodes,
95                      Snippet::MatchPositions* match_positions);
96
97 private:
98  // Does the work of parsing a query; creates nodes in QueryNodeList as
99  // appropriate. This is invoked from both of the ParseQuery methods.
100  bool ParseQueryImpl(const string16& query,
101                      QueryNodeList* root);
102
103  // Extracts the words from text, placing each word into words.
104  void ExtractQueryWords(const string16& text,
105                         std::vector<QueryWord>* words);
106};
107
108#endif  // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
109