1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/basictypes.h" 6#include "base/memory/scoped_vector.h" 7#include "base/utf_string_conversions.h" 8#include "chrome/browser/history/query_parser.h" 9#include "testing/gtest/include/gtest/gtest.h" 10 11class QueryParserTest : public testing::Test { 12 public: 13 struct TestData { 14 const char* input; 15 const int expected_word_count; 16 }; 17 18 std::string QueryToString(const std::string& query); 19 20 protected: 21 QueryParser query_parser_; 22}; 23 24// Test helper: Convert a user query string in 8-bit (for hardcoding 25// convenience) to a SQLite query string. 26std::string QueryParserTest::QueryToString(const std::string& query) { 27 string16 sqlite_query; 28 query_parser_.ParseQuery(UTF8ToUTF16(query), &sqlite_query); 29 return UTF16ToUTF8(sqlite_query); 30} 31 32// Basic multi-word queries, including prefix matching. 33TEST_F(QueryParserTest, SimpleQueries) { 34 EXPECT_EQ("", QueryToString(" ")); 35 EXPECT_EQ("singleword*", QueryToString("singleword")); 36 EXPECT_EQ("spacedout*", QueryToString(" spacedout ")); 37 EXPECT_EQ("foo* bar*", QueryToString("foo bar")); 38 // Short words aren't prefix matches. For Korean Hangul 39 // the minimum is 2 while for other scripts, it's 3. 40 EXPECT_EQ("f b", QueryToString(" f b")); 41 // KA JANG 42 EXPECT_EQ(WideToUTF8(L"\xAC00 \xC7A5"), 43 QueryToString(WideToUTF8(L" \xAC00 \xC7A5"))); 44 EXPECT_EQ("foo* bar*", QueryToString(" foo bar ")); 45 // KA-JANG BICH-GO 46 EXPECT_EQ(WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"), 47 QueryToString(WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0"))); 48} 49 50// Quoted substring parsing. 51TEST_F(QueryParserTest, Quoted) { 52 // ASCII quotes 53 EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\"")); 54 // Missing end quotes 55 EXPECT_EQ("\"miss end\"", QueryToString("\"miss end")); 56 // Missing begin quotes 57 EXPECT_EQ("miss* beg*", QueryToString("miss beg\"")); 58 // Weird formatting 59 EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many \"\"quotes")); 60} 61 62// Apostrophes within words should be preserved, but otherwise stripped. 63TEST_F(QueryParserTest, Apostrophes) { 64 EXPECT_EQ("foo* bar's*", QueryToString("foo bar's")); 65 EXPECT_EQ("l'foo*", QueryToString("l'foo")); 66 EXPECT_EQ("foo*", QueryToString("'foo")); 67} 68 69// Special characters. 70TEST_F(QueryParserTest, SpecialChars) { 71 EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar")); 72} 73 74TEST_F(QueryParserTest, NumWords) { 75 TestData data[] = { 76 { "blah", 1 }, 77 { "foo \"bar baz\"", 3 }, 78 { "foo \"baz\"", 2 }, 79 { "foo \"bar baz\" blah", 4 }, 80 }; 81 82 for (size_t i = 0; i < arraysize(data); ++i) { 83 string16 query_string; 84 EXPECT_EQ(data[i].expected_word_count, 85 query_parser_.ParseQuery(UTF8ToUTF16(data[i].input), 86 &query_string)); 87 } 88} 89 90TEST_F(QueryParserTest, ParseQueryNodesAndMatch) { 91 struct TestData2 { 92 const std::string query; 93 const std::string text; 94 const bool matches; 95 const size_t m1_start; 96 const size_t m1_end; 97 const size_t m2_start; 98 const size_t m2_end; 99 } data[] = { 100 { "foo foo", "foo", true, 0, 3, 0, 0 }, 101 { "foo fooey", "fooey", true, 0, 5, 0, 0 }, 102 { "foo fooey bar", "bar fooey", true, 0, 3, 4, 9 }, 103 { "blah", "blah", true, 0, 4, 0, 0 }, 104 { "blah", "foo", false, 0, 0, 0, 0 }, 105 { "blah", "blahblah", true, 0, 4, 0, 0 }, 106 { "blah", "foo blah", true, 4, 8, 0, 0 }, 107 { "foo blah", "blah", false, 0, 0, 0, 0 }, 108 { "foo blah", "blahx foobar", true, 0, 4, 6, 9 }, 109 { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 }, 110 { "\"foo blah\"", "foox blahx", false, 0, 0, 0, 0 }, 111 { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 }, 112 { "\"foo blah\"", "\"foo blah\"", true, 1, 9, 0, 0 }, 113 { "foo blah", "\"foo bar blah\"", true, 1, 4, 9, 13 }, 114 }; 115 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { 116 QueryParser parser; 117 ScopedVector<QueryNode> query_nodes; 118 parser.ParseQuery(UTF8ToUTF16(data[i].query), &query_nodes.get()); 119 Snippet::MatchPositions match_positions; 120 ASSERT_EQ(data[i].matches, 121 parser.DoesQueryMatch(UTF8ToUTF16(data[i].text), 122 query_nodes.get(), 123 &match_positions)); 124 size_t offset = 0; 125 if (data[i].m1_start != 0 || data[i].m1_end != 0) { 126 ASSERT_TRUE(match_positions.size() >= 1); 127 EXPECT_EQ(data[i].m1_start, match_positions[0].first); 128 EXPECT_EQ(data[i].m1_end, match_positions[0].second); 129 offset++; 130 } 131 if (data[i].m2_start != 0 || data[i].m2_end != 0) { 132 ASSERT_TRUE(match_positions.size() == 1 + offset); 133 EXPECT_EQ(data[i].m2_start, match_positions[offset].first); 134 EXPECT_EQ(data[i].m2_end, match_positions[offset].second); 135 } 136 } 137} 138 139TEST_F(QueryParserTest, ExtractQueryWords) { 140 struct TestData2 { 141 const std::string text; 142 const std::string w1; 143 const std::string w2; 144 const std::string w3; 145 const size_t word_count; 146 } data[] = { 147 { "foo", "foo", "", "", 1 }, 148 { "foo bar", "foo", "bar", "", 2 }, 149 { "\"foo bar\"", "foo", "bar", "", 2 }, 150 { "\"foo bar\" a", "foo", "bar", "a", 3 }, 151 }; 152 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { 153 std::vector<string16> results; 154 QueryParser parser; 155 parser.ExtractQueryWords(UTF8ToUTF16(data[i].text), &results); 156 ASSERT_EQ(data[i].word_count, results.size()); 157 EXPECT_EQ(data[i].w1, UTF16ToUTF8(results[0])); 158 if (results.size() == 2) 159 EXPECT_EQ(data[i].w2, UTF16ToUTF8(results[1])); 160 if (results.size() == 3) 161 EXPECT_EQ(data[i].w3, UTF16ToUTF8(results[2])); 162 } 163} 164