1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/basictypes.h"
6#include "base/memory/scoped_vector.h"
7#include "base/strings/utf_string_conversions.h"
8#include "components/query_parser/query_parser.h"
9#include "testing/gtest/include/gtest/gtest.h"
10
11namespace query_parser {
12
13class QueryParserTest : public testing::Test {
14 public:
15  struct TestData {
16    const char* input;
17    const int expected_word_count;
18  };
19
20  std::string QueryToString(const std::string& query);
21
22 protected:
23  QueryParser query_parser_;
24};
25
26// Test helper: Convert a user query string in 8-bit (for hardcoding
27// convenience) to a SQLite query string.
28std::string QueryParserTest::QueryToString(const std::string& query) {
29  base::string16 sqlite_query;
30  query_parser_.ParseQuery(base::UTF8ToUTF16(query), &sqlite_query);
31  return base::UTF16ToUTF8(sqlite_query);
32}
33
34// Basic multi-word queries, including prefix matching.
35TEST_F(QueryParserTest, SimpleQueries) {
36  EXPECT_EQ("", QueryToString(" "));
37  EXPECT_EQ("singleword*", QueryToString("singleword"));
38  EXPECT_EQ("spacedout*", QueryToString("  spacedout "));
39  EXPECT_EQ("foo* bar*", QueryToString("foo bar"));
40  // Short words aren't prefix matches. For Korean Hangul
41  // the minimum is 2 while for other scripts, it's 3.
42  EXPECT_EQ("f b", QueryToString(" f b"));
43  // KA JANG
44  EXPECT_EQ(base::WideToUTF8(L"\xAC00 \xC7A5"),
45            QueryToString(base::WideToUTF8(L" \xAC00 \xC7A5")));
46  EXPECT_EQ("foo* bar*", QueryToString(" foo   bar "));
47  // KA-JANG BICH-GO
48  EXPECT_EQ(base::WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"),
49            QueryToString(base::WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0")));
50}
51
52// Quoted substring parsing.
53TEST_F(QueryParserTest, Quoted) {
54  // ASCII quotes
55  EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\""));
56  // Missing end quotes
57  EXPECT_EQ("\"miss end\"", QueryToString("\"miss end"));
58  // Missing begin quotes
59  EXPECT_EQ("miss* beg*", QueryToString("miss beg\""));
60  // Weird formatting
61  EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many   \"\"quotes"));
62}
63
64// Apostrophes within words should be preserved, but otherwise stripped.
65TEST_F(QueryParserTest, Apostrophes) {
66  EXPECT_EQ("foo* bar's*", QueryToString("foo bar's"));
67  EXPECT_EQ("l'foo*", QueryToString("l'foo"));
68  EXPECT_EQ("foo*", QueryToString("'foo"));
69}
70
71// Special characters.
72TEST_F(QueryParserTest, SpecialChars) {
73  EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar"));
74}
75
76TEST_F(QueryParserTest, NumWords) {
77  TestData data[] = {
78    { "blah",                  1 },
79    { "foo \"bar baz\"",       3 },
80    { "foo \"baz\"",           2 },
81    { "foo \"bar baz\"  blah", 4 },
82  };
83
84  for (size_t i = 0; i < arraysize(data); ++i) {
85    base::string16 query_string;
86    EXPECT_EQ(data[i].expected_word_count,
87              query_parser_.ParseQuery(base::UTF8ToUTF16(data[i].input),
88                                       &query_string));
89  }
90}
91
92TEST_F(QueryParserTest, ParseQueryNodesAndMatch) {
93  struct TestData2 {
94    const std::string query;
95    const std::string text;
96    const bool matches;
97    const size_t m1_start;
98    const size_t m1_end;
99    const size_t m2_start;
100    const size_t m2_end;
101  } data[] = {
102    { "foo",           "fooey foo",        true,  0, 3, 6, 9 },
103    { "foo foo",       "foo",              true,  0, 3, 0, 0 },
104    { "foo fooey",     "fooey",            true,  0, 5, 0, 0 },
105    { "fooey foo",     "fooey",            true,  0, 5, 0, 0 },
106    { "foo fooey bar", "bar fooey",        true,  0, 3, 4, 9 },
107    { "blah",          "blah",             true,  0, 4, 0, 0 },
108    { "blah",          "foo",              false, 0, 0, 0, 0 },
109    { "blah",          "blahblah",         true,  0, 4, 0, 0 },
110    { "blah",          "foo blah",         true,  4, 8, 0, 0 },
111    { "foo blah",      "blah",             false, 0, 0, 0, 0 },
112    { "foo blah",      "blahx foobar",     true,  0, 4, 6, 9 },
113    { "\"foo blah\"",  "foo blah",         true,  0, 8, 0, 0 },
114    { "\"foo blah\"",  "foox blahx",       false, 0, 0, 0, 0 },
115    { "\"foo blah\"",  "foo blah",         true,  0, 8, 0, 0 },
116    { "\"foo blah\"",  "\"foo blah\"",     true,  1, 9, 0, 0 },
117    { "foo blah",      "\"foo bar blah\"", true,  1, 4, 9, 13 },
118  };
119  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
120    QueryParser parser;
121    ScopedVector<QueryNode> query_nodes;
122    parser.ParseQueryNodes(base::UTF8ToUTF16(data[i].query),
123                           &query_nodes.get());
124    Snippet::MatchPositions match_positions;
125    ASSERT_EQ(data[i].matches,
126              parser.DoesQueryMatch(base::UTF8ToUTF16(data[i].text),
127                                    query_nodes.get(),
128                                    &match_positions));
129    size_t offset = 0;
130    if (data[i].m1_start != 0 || data[i].m1_end != 0) {
131      ASSERT_TRUE(match_positions.size() >= 1);
132      EXPECT_EQ(data[i].m1_start, match_positions[0].first);
133      EXPECT_EQ(data[i].m1_end, match_positions[0].second);
134      offset++;
135    }
136    if (data[i].m2_start != 0 || data[i].m2_end != 0) {
137      ASSERT_TRUE(match_positions.size() == 1 + offset);
138      EXPECT_EQ(data[i].m2_start, match_positions[offset].first);
139      EXPECT_EQ(data[i].m2_end, match_positions[offset].second);
140    }
141  }
142}
143
144TEST_F(QueryParserTest, ParseQueryWords) {
145  struct TestData2 {
146    const std::string text;
147    const std::string w1;
148    const std::string w2;
149    const std::string w3;
150    const size_t word_count;
151  } data[] = {
152    { "foo",           "foo", "",    "",  1 },
153    { "foo bar",       "foo", "bar", "",  2 },
154    { "\"foo bar\"",   "foo", "bar", "",  2 },
155    { "\"foo bar\" a", "foo", "bar", "a", 3 },
156  };
157  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
158    std::vector<base::string16> results;
159    QueryParser parser;
160    parser.ParseQueryWords(base::UTF8ToUTF16(data[i].text), &results);
161    ASSERT_EQ(data[i].word_count, results.size());
162    EXPECT_EQ(data[i].w1, base::UTF16ToUTF8(results[0]));
163    if (results.size() == 2)
164      EXPECT_EQ(data[i].w2, base::UTF16ToUTF8(results[1]));
165    if (results.size() == 3)
166      EXPECT_EQ(data[i].w3, base::UTF16ToUTF8(results[2]));
167  }
168}
169
170}  // namespace query_parser
171