1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "app/sql/connection.h"
6#include "base/file_path.h"
7#include "base/file_util.h"
8#include "base/message_loop.h"
9#include "base/utf_string_conversions.h"
10#include "chrome/browser/history/text_database_manager.h"
11#include "chrome/browser/history/visit_database.h"
12#include "testing/gtest/include/gtest/gtest.h"
13
14using base::Time;
15using base::TimeDelta;
16using base::TimeTicks;
17
18namespace history {
19
20namespace {
21
22const char* kURL1 = "http://www.google.com/asdf";
23const char* kTitle1 = "Google A";
24const char* kBody1 = "FOO page one.";
25
26const char* kURL2 = "http://www.google.com/qwer";
27const char* kTitle2 = "Google B";
28const char* kBody2 = "FOO two.";
29
30const char* kURL3 = "http://www.google.com/zxcv";
31const char* kTitle3 = "Google C";
32const char* kBody3 = "FOO drei";
33
34const char* kURL4 = "http://www.google.com/hjkl";
35const char* kTitle4 = "Google D";
36const char* kBody4 = "FOO lalala four.";
37
38const char* kURL5 = "http://www.google.com/uiop";
39const char* kTitle5 = "Google cinq";
40const char* kBody5 = "FOO page one.";
41
42// This provides a simple implementation of a URL+VisitDatabase using an
43// in-memory sqlite connection. The text database manager expects to be able to
44// update the visit database to keep in sync.
45class InMemDB : public URLDatabase, public VisitDatabase {
46 public:
47  InMemDB() {
48    EXPECT_TRUE(db_.OpenInMemory());
49    CreateURLTable(false);
50    InitVisitTable();
51  }
52  ~InMemDB() {
53  }
54
55 private:
56  virtual sql::Connection& GetDB() { return db_; }
57
58  sql::Connection db_;
59
60  DISALLOW_COPY_AND_ASSIGN(InMemDB);
61};
62
63// Adds all the pages once, and the first page once more in the next month.
64// The times of all the pages will be filled into |*times|.
65void AddAllPages(TextDatabaseManager& manager, VisitDatabase* visit_db,
66                 std::vector<Time>* times) {
67  Time::Exploded exploded;
68  memset(&exploded, 0, sizeof(Time::Exploded));
69
70  // Put the visits in two different months so it will query across databases.
71  exploded.year = 2008;
72  exploded.month = 1;
73  exploded.day_of_month = 3;
74
75  VisitRow visit_row;
76  visit_row.url_id = 1;
77  visit_row.visit_time = Time::FromUTCExploded(exploded);
78  visit_row.referring_visit = 0;
79  visit_row.transition = 0;
80  visit_row.segment_id = 0;
81  visit_row.is_indexed = false;
82  VisitID visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
83
84  times->push_back(visit_row.visit_time);
85  manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id,
86                      visit_row.visit_time, UTF8ToUTF16(kTitle1),
87                      UTF8ToUTF16(kBody1));
88
89  exploded.day_of_month++;
90  visit_row.url_id = 2;
91  visit_row.visit_time = Time::FromUTCExploded(exploded);
92  visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
93  times->push_back(visit_row.visit_time);
94  manager.AddPageData(GURL(kURL2), visit_row.url_id, visit_row.visit_id,
95                      visit_row.visit_time, UTF8ToUTF16(kTitle2),
96                      UTF8ToUTF16(kBody2));
97
98  exploded.day_of_month++;
99  visit_row.url_id = 2;
100  visit_row.visit_time = Time::FromUTCExploded(exploded);
101  visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
102  times->push_back(visit_row.visit_time);
103  manager.AddPageData(GURL(kURL3), visit_row.url_id, visit_row.visit_id,
104                      visit_row.visit_time, UTF8ToUTF16(kTitle3),
105                      UTF8ToUTF16(kBody3));
106
107  // Put the next ones in the next month.
108  exploded.month++;
109  visit_row.url_id = 2;
110  visit_row.visit_time = Time::FromUTCExploded(exploded);
111  visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
112  times->push_back(visit_row.visit_time);
113  manager.AddPageData(GURL(kURL4), visit_row.url_id, visit_row.visit_id,
114                      visit_row.visit_time, UTF8ToUTF16(kTitle4),
115                      UTF8ToUTF16(kBody4));
116
117  exploded.day_of_month++;
118  visit_row.url_id = 2;
119  visit_row.visit_time = Time::FromUTCExploded(exploded);
120  visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
121  times->push_back(visit_row.visit_time);
122  manager.AddPageData(GURL(kURL5), visit_row.url_id, visit_row.visit_id,
123                      visit_row.visit_time, UTF8ToUTF16(kTitle5),
124                      UTF8ToUTF16(kBody5));
125
126  // Put the first one in again in the second month.
127  exploded.day_of_month++;
128  visit_row.url_id = 2;
129  visit_row.visit_time = Time::FromUTCExploded(exploded);
130  visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
131  times->push_back(visit_row.visit_time);
132  manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id,
133                      visit_row.visit_time, UTF8ToUTF16(kTitle1),
134                      UTF8ToUTF16(kBody1));
135}
136
137bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results,
138                    const char* url) {
139  GURL gurl(url);
140  for (size_t i = 0; i < results.size(); i++) {
141    if (results[i].url == gurl)
142      return true;
143  }
144  return false;
145}
146
147}  // namespace
148
149class TextDatabaseManagerTest : public testing::Test {
150 public:
151  // Called manually by the test so it can report failure to initialize.
152  bool Init() {
153    return file_util::CreateNewTempDirectory(
154        FILE_PATH_LITERAL("TestSearchTest"), &dir_);
155  }
156
157 protected:
158  void SetUp() {
159  }
160
161  void TearDown() {
162    file_util::Delete(dir_, true);
163  }
164
165  MessageLoop message_loop_;
166
167  // Directory containing the databases.
168  FilePath dir_;
169};
170
171// Tests basic querying.
172TEST_F(TextDatabaseManagerTest, InsertQuery) {
173  ASSERT_TRUE(Init());
174  InMemDB visit_db;
175  TextDatabaseManager manager(dir_, &visit_db, &visit_db);
176  ASSERT_TRUE(manager.Init(NULL));
177
178  std::vector<Time> times;
179  AddAllPages(manager, &visit_db, &times);
180
181  QueryOptions options;
182  options.begin_time = times[0] - TimeDelta::FromDays(100);
183  options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
184  std::vector<TextDatabase::Match> results;
185  Time first_time_searched;
186  manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
187                         &results, &first_time_searched);
188
189  // We should have matched every page.
190  EXPECT_EQ(6U, results.size());
191  EXPECT_TRUE(ResultsHaveURL(results, kURL1));
192  EXPECT_TRUE(ResultsHaveURL(results, kURL2));
193  EXPECT_TRUE(ResultsHaveURL(results, kURL3));
194  EXPECT_TRUE(ResultsHaveURL(results, kURL4));
195  EXPECT_TRUE(ResultsHaveURL(results, kURL5));
196
197  // The first time searched should have been the first page's time or before
198  // (it could have eliminated some time for us).
199  EXPECT_TRUE(first_time_searched <= times[0]);
200}
201
202// Tests that adding page components piecemeal will get them added properly.
203// This does not supply a visit to update, this mode is used only by the unit
204// tests right now, but we test it anyway.
205TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) {
206  ASSERT_TRUE(Init());
207  InMemDB visit_db;
208  TextDatabaseManager manager(dir_, &visit_db, &visit_db);
209  ASSERT_TRUE(manager.Init(NULL));
210
211  // First add one without a visit.
212  const GURL url(kURL1);
213  manager.AddPageURL(url, 0, 0, Time::Now());
214  manager.AddPageTitle(url, UTF8ToUTF16(kTitle1));
215  manager.AddPageContents(url, UTF8ToUTF16(kBody1));
216
217  // Check that the page got added.
218  QueryOptions options;
219  std::vector<TextDatabase::Match> results;
220  Time first_time_searched;
221
222  manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
223                         &results, &first_time_searched);
224  ASSERT_EQ(1U, results.size());
225  EXPECT_EQ(kTitle1, UTF16ToUTF8(results[0].title));
226}
227
228// Like InsertCompleteNoVisit but specifies a visit to update. We check that the
229// visit was updated properly.
230TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) {
231  ASSERT_TRUE(Init());
232  InMemDB visit_db;
233  TextDatabaseManager manager(dir_, &visit_db, &visit_db);
234  ASSERT_TRUE(manager.Init(NULL));
235
236  // First add a visit to a page. We can just make up a URL ID since there is
237  // not actually any URL database around.
238  VisitRow visit;
239  visit.url_id = 1;
240  visit.visit_time = Time::Now();
241  visit.referring_visit = 0;
242  visit.transition = PageTransition::LINK;
243  visit.segment_id = 0;
244  visit.is_indexed = false;
245  visit_db.AddVisit(&visit, SOURCE_BROWSED);
246
247  // Add a full text indexed entry for that visit.
248  const GURL url(kURL2);
249  manager.AddPageURL(url, visit.url_id, visit.visit_id, visit.visit_time);
250  manager.AddPageContents(url, UTF8ToUTF16(kBody2));
251  manager.AddPageTitle(url, UTF8ToUTF16(kTitle2));
252
253  // Check that the page got added.
254  QueryOptions options;
255  std::vector<TextDatabase::Match> results;
256  Time first_time_searched;
257
258  manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
259                         &results, &first_time_searched);
260  ASSERT_EQ(1U, results.size());
261  EXPECT_EQ(kTitle2, UTF16ToUTF8(results[0].title));
262
263  // Check that the visit got updated for its new indexed state.
264  VisitRow out_visit;
265  ASSERT_TRUE(visit_db.GetRowForVisit(visit.visit_id, &out_visit));
266  EXPECT_TRUE(out_visit.is_indexed);
267}
268
269// Tests that partial inserts that expire are added to the database.
270TEST_F(TextDatabaseManagerTest, InsertPartial) {
271  ASSERT_TRUE(Init());
272  InMemDB visit_db;
273  TextDatabaseManager manager(dir_, &visit_db, &visit_db);
274  ASSERT_TRUE(manager.Init(NULL));
275
276  // Add the first one with just a URL.
277  GURL url1(kURL1);
278  manager.AddPageURL(url1, 0, 0, Time::Now());
279
280  // Now add a second one with a URL and title.
281  GURL url2(kURL2);
282  manager.AddPageURL(url2, 0, 0, Time::Now());
283  manager.AddPageTitle(url2, UTF8ToUTF16(kTitle2));
284
285  // The third one has a URL and body.
286  GURL url3(kURL3);
287  manager.AddPageURL(url3, 0, 0, Time::Now());
288  manager.AddPageContents(url3, UTF8ToUTF16(kBody3));
289
290  // Expire stuff very fast. This assumes that the time between the first
291  // AddPageURL and this line is less than the expiration time (20 seconds).
292  TimeTicks added_time = TimeTicks::Now();
293  TimeTicks expire_time = added_time + TimeDelta::FromSeconds(5);
294  manager.FlushOldChangesForTime(expire_time);
295
296  // Do a query, nothing should be added yet.
297  QueryOptions options;
298  std::vector<TextDatabase::Match> results;
299  Time first_time_searched;
300  manager.GetTextMatches(UTF8ToUTF16("google"), options,
301                         &results, &first_time_searched);
302  ASSERT_EQ(0U, results.size());
303
304  // Compute a time threshold that will cause everything to be flushed, and
305  // poke at the manager's internals to cause this to happen.
306  expire_time = added_time + TimeDelta::FromDays(1);
307  manager.FlushOldChangesForTime(expire_time);
308
309  // Now we should have all 3 URLs added.
310  manager.GetTextMatches(UTF8ToUTF16("google"), options,
311                         &results, &first_time_searched);
312  ASSERT_EQ(3U, results.size());
313  EXPECT_TRUE(ResultsHaveURL(results, kURL1));
314  EXPECT_TRUE(ResultsHaveURL(results, kURL2));
315  EXPECT_TRUE(ResultsHaveURL(results, kURL3));
316}
317
318// Tests that partial inserts (due to timeouts) will still get updated if the
319// data comes in later.
320TEST_F(TextDatabaseManagerTest, PartialComplete) {
321  ASSERT_TRUE(Init());
322  InMemDB visit_db;
323  TextDatabaseManager manager(dir_, &visit_db, &visit_db);
324  ASSERT_TRUE(manager.Init(NULL));
325
326  Time added_time = Time::Now();
327  GURL url(kURL1);
328
329  // We have to have the URL in the URL and visit databases for this test to
330  // work.
331  URLRow url_row(url);
332  url_row.set_title(UTF8ToUTF16("chocolate"));
333  URLID url_id = visit_db.AddURL(url_row);
334  ASSERT_TRUE(url_id);
335  VisitRow visit_row;
336  visit_row.url_id = url_id;
337  visit_row.visit_time = added_time;
338  visit_db.AddVisit(&visit_row, SOURCE_BROWSED);
339
340  // Add a URL with no title or body, and say that it expired.
341  manager.AddPageURL(url, 0, 0, added_time);
342  TimeTicks expire_time = TimeTicks::Now() + TimeDelta::FromDays(1);
343  manager.FlushOldChangesForTime(expire_time);
344
345  // Add the title. We should be able to query based on that. The title in the
346  // URL row we set above should not come into the picture.
347  manager.AddPageTitle(url, UTF8ToUTF16("Some unique title"));
348  Time first_time_searched;
349  QueryOptions options;
350  std::vector<TextDatabase::Match> results;
351  manager.GetTextMatches(UTF8ToUTF16("unique"), options,
352                         &results, &first_time_searched);
353  EXPECT_EQ(1U, results.size());
354  manager.GetTextMatches(UTF8ToUTF16("chocolate"), options,
355                         &results, &first_time_searched);
356  EXPECT_EQ(0U, results.size());
357
358  // Now add the body, which should be queryable.
359  manager.AddPageContents(url, UTF8ToUTF16("Very awesome body"));
360  manager.GetTextMatches(UTF8ToUTF16("awesome"), options, &results, &first_time_searched);
361  EXPECT_EQ(1U, results.size());
362
363  // Adding the body will actually copy the title from the URL table rather
364  // than the previously indexed row (we made them not match above). This isn't
365  // necessarily what we want, but it's how it's implemented, and we don't want
366  // to regress it.
367  manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, &results, &first_time_searched);
368  EXPECT_EQ(1U, results.size());
369}
370
371// Tests that changes get properly committed to disk.
372TEST_F(TextDatabaseManagerTest, Writing) {
373  ASSERT_TRUE(Init());
374
375  QueryOptions options;
376  std::vector<TextDatabase::Match> results;
377  Time first_time_searched;
378
379  InMemDB visit_db;
380
381  // Create the manager and write some stuff to it.
382  {
383    TextDatabaseManager manager(dir_, &visit_db, &visit_db);
384    ASSERT_TRUE(manager.Init(NULL));
385
386    std::vector<Time> times;
387    AddAllPages(manager, &visit_db, &times);
388
389    // We should have matched every page.
390    manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched);
391    EXPECT_EQ(6U, results.size());
392  }
393  results.clear();
394
395  // Recreate the manager and make sure it finds the written stuff.
396  {
397    TextDatabaseManager manager(dir_, &visit_db, &visit_db);
398    ASSERT_TRUE(manager.Init(NULL));
399
400    // We should have matched every page again.
401    manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
402                           &results, &first_time_searched);
403    EXPECT_EQ(6U, results.size());
404  }
405}
406
407// Tests that changes get properly committed to disk, as in the Writing test
408// above, but when there is a transaction around the adds.
409TEST_F(TextDatabaseManagerTest, WritingTransaction) {
410  ASSERT_TRUE(Init());
411
412  QueryOptions options;
413  std::vector<TextDatabase::Match> results;
414  Time first_time_searched;
415
416  InMemDB visit_db;
417
418  // Create the manager and write some stuff to it.
419  {
420    TextDatabaseManager manager(dir_, &visit_db, &visit_db);
421    ASSERT_TRUE(manager.Init(NULL));
422
423    std::vector<Time> times;
424    manager.BeginTransaction();
425    AddAllPages(manager, &visit_db, &times);
426    // "Forget" to commit, it should be autocommittedd for us.
427
428    // We should have matched every page.
429    manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
430                           &results, &first_time_searched);
431    EXPECT_EQ(6U, results.size());
432  }
433  results.clear();
434
435  // Recreate the manager and make sure it finds the written stuff.
436  {
437    TextDatabaseManager manager(dir_, &visit_db, &visit_db);
438    ASSERT_TRUE(manager.Init(NULL));
439
440    // We should have matched every page again.
441    manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
442                           &results, &first_time_searched);
443    EXPECT_EQ(6U, results.size());
444  }
445}
446
447// Tests querying where the maximum number of items is met.
448TEST_F(TextDatabaseManagerTest, QueryMax) {
449  ASSERT_TRUE(Init());
450  InMemDB visit_db;
451  TextDatabaseManager manager(dir_, &visit_db, &visit_db);
452  ASSERT_TRUE(manager.Init(NULL));
453
454  std::vector<Time> times;
455  AddAllPages(manager, &visit_db, &times);
456
457  string16 foo = UTF8ToUTF16("FOO");
458
459  QueryOptions options;
460  options.begin_time = times[0] - TimeDelta::FromDays(100);
461  options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
462  options.max_count = 2;
463  std::vector<TextDatabase::Match> results;
464  Time first_time_searched;
465  manager.GetTextMatches(foo, options, &results, &first_time_searched);
466
467  // We should have gotten the last two pages as results (the first page is
468  // also the last).
469  EXPECT_EQ(2U, results.size());
470  EXPECT_TRUE(first_time_searched <= times[4]);
471  EXPECT_TRUE(ResultsHaveURL(results, kURL5));
472  EXPECT_TRUE(ResultsHaveURL(results, kURL1));
473
474  // Asking for 4 pages, the first one should be in another DB.
475  options.max_count = 4;
476  manager.GetTextMatches(foo, options, &results, &first_time_searched);
477
478  EXPECT_EQ(4U, results.size());
479  EXPECT_TRUE(first_time_searched <= times[4]);
480  EXPECT_TRUE(ResultsHaveURL(results, kURL3));
481  EXPECT_TRUE(ResultsHaveURL(results, kURL4));
482  EXPECT_TRUE(ResultsHaveURL(results, kURL5));
483  EXPECT_TRUE(ResultsHaveURL(results, kURL1));
484}
485
486// Tests querying backwards in time in chunks.
487TEST_F(TextDatabaseManagerTest, QueryBackwards) {
488  ASSERT_TRUE(Init());
489  InMemDB visit_db;
490  TextDatabaseManager manager(dir_, &visit_db, &visit_db);
491  ASSERT_TRUE(manager.Init(NULL));
492
493  std::vector<Time> times;
494  AddAllPages(manager, &visit_db, &times);
495
496  string16 foo = UTF8ToUTF16("FOO");
497
498  // First do a query for all time, but with a max of 2. This will give us the
499  // last two results and will tell us where to start searching when we want
500  // to go back in time.
501  QueryOptions options;
502  options.begin_time = times[0] - TimeDelta::FromDays(100);
503  options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
504  options.max_count = 2;
505  std::vector<TextDatabase::Match> results;
506  Time first_time_searched;
507  manager.GetTextMatches(foo, options, &results, &first_time_searched);
508
509  // Check that we got the last two results.
510  EXPECT_EQ(2U, results.size());
511  EXPECT_TRUE(first_time_searched <= times[4]);
512  EXPECT_TRUE(ResultsHaveURL(results, kURL5));
513  EXPECT_TRUE(ResultsHaveURL(results, kURL1));
514
515  // Query the previous two URLs and make sure we got the correct ones.
516  options.end_time = first_time_searched;
517  manager.GetTextMatches(foo, options, &results, &first_time_searched);
518  EXPECT_EQ(2U, results.size());
519  EXPECT_TRUE(first_time_searched <= times[2]);
520  EXPECT_TRUE(ResultsHaveURL(results, kURL3));
521  EXPECT_TRUE(ResultsHaveURL(results, kURL4));
522
523  // Query the previous two URLs...
524  options.end_time = first_time_searched;
525  manager.GetTextMatches(foo, options, &results, &first_time_searched);
526  EXPECT_EQ(2U, results.size());
527  EXPECT_TRUE(first_time_searched <= times[0]);
528  EXPECT_TRUE(ResultsHaveURL(results, kURL2));
529  EXPECT_TRUE(ResultsHaveURL(results, kURL1));
530
531  // Try to query some more, there should be no results.
532  options.end_time = first_time_searched;
533  manager.GetTextMatches(foo, options, &results, &first_time_searched);
534  EXPECT_EQ(0U, results.size());
535}
536
537}  // namespace history
538