history_types.h revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
6#define CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
7
8#include <map>
9#include <set>
10#include <string>
11#include <vector>
12
13#include "base/basictypes.h"
14#include "base/stack_container.h"
15#include "base/string16.h"
16#include "base/time.h"
17#include "chrome/browser/history/snippet.h"
18#include "chrome/common/page_transition_types.h"
19#include "chrome/common/ref_counted_util.h"
20#include "googleurl/src/gurl.h"
21
22namespace history {
23
24// Forward declaration for friend statements.
25class HistoryBackend;
26class URLDatabase;
27
28// Structure to hold redirect lists for URLs.  For a redirect chain
29// A -> B -> C, and entry in the map would look like "A => {B -> C}".
30typedef std::map<GURL, scoped_refptr<RefCountedVector<GURL> > > RedirectMap;
31
32// Container for a list of URLs.
33typedef std::vector<GURL> RedirectList;
34
35typedef int64 StarID;  // Unique identifier for star entries.
36typedef int64 UIStarID;  // Identifier for star entries that come from the UI.
37typedef int64 DownloadID;   // Identifier for a download.
38typedef int64 FavIconID;  // For FavIcons.
39typedef int64 SegmentID;  // URL segments for the most visited view.
40
41// URLRow ---------------------------------------------------------------------
42
43typedef int64 URLID;
44
45// Holds all information globally associated with one URL (one row in the
46// URL table).
47//
48// This keeps track of dirty bits, which are currently unused:
49//
50// TODO(brettw) the dirty bits are broken in a number of respects. First, the
51// database will want to update them on a const object, so they need to be
52// mutable.
53//
54// Second, there is a problem copying. If you make a copy of this structure
55// (as we allow since we put this into vectors in various places) then the
56// dirty bits will not be in sync for these copies.
57class URLRow {
58 public:
59  URLRow() {
60    Initialize();
61  }
62
63  explicit URLRow(const GURL& url) : url_(url) {
64    // Initialize will not set the URL, so our initialization above will stay.
65    Initialize();
66  }
67
68  // We need to be able to set the id of a URLRow that's being passed through
69  // an IPC message.  This constructor should probably not be used otherwise.
70  URLRow(const GURL& url, URLID id) : url_(url) {
71    // Initialize will not set the URL, so our initialization above will stay.
72    Initialize();
73    // Initialize will zero the id_, so set it here.
74    id_ = id;
75  }
76
77  virtual ~URLRow() {}
78
79  URLID id() const { return id_; }
80  const GURL& url() const { return url_; }
81
82  const string16& title() const {
83    return title_;
84  }
85  void set_title(const string16& title) {
86    // The title is frequently set to the same thing, so we don't bother
87    // updating unless the string has changed.
88    if (title != title_) {
89      title_ = title;
90    }
91  }
92
93  int visit_count() const {
94    return visit_count_;
95  }
96  void set_visit_count(int visit_count) {
97    visit_count_ = visit_count;
98  }
99
100  // Number of times the URL was typed in the Omnibox.
101  int typed_count() const {
102    return typed_count_;
103  }
104  void set_typed_count(int typed_count) {
105    typed_count_ = typed_count;
106  }
107
108  base::Time last_visit() const {
109    return last_visit_;
110  }
111  void set_last_visit(base::Time last_visit) {
112    last_visit_ = last_visit;
113  }
114
115  // If this is set, we won't autocomplete this URL.
116  bool hidden() const {
117    return hidden_;
118  }
119  void set_hidden(bool hidden) {
120    hidden_ = hidden;
121  }
122
123  // ID of the favicon. A value of 0 means the favicon isn't known yet.
124  FavIconID favicon_id() const { return favicon_id_; }
125  void set_favicon_id(FavIconID favicon_id) {
126    favicon_id_ = favicon_id;
127  }
128
129  // Swaps the contents of this URLRow with another, which allows it to be
130  // destructively copied without memory allocations.
131  // (Virtual because it's overridden by URLResult.)
132  virtual void Swap(URLRow* other);
133
134 private:
135  // This class writes directly into this structure and clears our dirty bits
136  // when reading out of the DB.
137  friend class URLDatabase;
138  friend class HistoryBackend;
139
140  // Initializes all values that need initialization to their defaults.
141  // This excludes objects which autoinitialize such as strings.
142  void Initialize();
143
144  // The row ID of this URL. Immutable except for the database which sets it
145  // when it pulls them out.
146  URLID id_;
147
148  // The URL of this row. Immutable except for the database which sets it
149  // when it pulls them out. If clients want to change it, they must use
150  // the constructor to make a new one.
151  GURL url_;
152
153  string16 title_;
154
155  // Total number of times this URL has been visited.
156  int visit_count_;
157
158  // Number of times this URL has been manually entered in the URL bar.
159  int typed_count_;
160
161  // The date of the last visit of this URL, which saves us from having to
162  // loop up in the visit table for things like autocomplete and expiration.
163  base::Time last_visit_;
164
165  // Indicates this entry should now be shown in typical UI or queries, this
166  // is usually for subframes.
167  bool hidden_;
168
169  // The ID of the favicon for this url.
170  FavIconID favicon_id_;
171
172  // We support the implicit copy constuctor and operator=.
173};
174
175// VisitRow -------------------------------------------------------------------
176
177typedef int64 VisitID;
178
179// Holds all information associated with a specific visit. A visit holds time
180// and referrer information for one time a URL is visited.
181class VisitRow {
182 public:
183  VisitRow();
184  VisitRow(URLID arg_url_id,
185           base::Time arg_visit_time,
186           VisitID arg_referring_visit,
187           PageTransition::Type arg_transition,
188           SegmentID arg_segment_id);
189
190  // ID of this row (visit ID, used a a referrer for other visits).
191  VisitID visit_id;
192
193  // Row ID into the URL table of the URL that this page is.
194  URLID url_id;
195
196  base::Time visit_time;
197
198  // Indicates another visit that was the referring page for this one.
199  // 0 indicates no referrer.
200  VisitID referring_visit;
201
202  // A combination of bits from PageTransition.
203  PageTransition::Type transition;
204
205  // The segment id (see visitsegment_database.*).
206  // If 0, the segment id is null in the table.
207  SegmentID segment_id;
208
209  // True when this visit has indexed data for it. We try to keep this in sync
210  // with the full text index: when we add or remove things from there, we will
211  // update the visit table as well. However, that file could get deleted, or
212  // out of sync in various ways, so this flag should be false when things
213  // change.
214  bool is_indexed;
215
216  // Compares two visits based on dates, for sorting.
217  bool operator<(const VisitRow& other) {
218    return visit_time < other.visit_time;
219  }
220
221  // We allow the implicit copy constuctor and operator=.
222};
223
224// We pass around vectors of visits a lot
225typedef std::vector<VisitRow> VisitVector;
226
227// Favicons -------------------------------------------------------------------
228
229// Used by the importer to set favicons for imported bookmarks.
230struct ImportedFavIconUsage {
231  // The URL of the favicon.
232  GURL favicon_url;
233
234  // The raw png-encoded data.
235  std::vector<unsigned char> png_data;
236
237  // The list of URLs using this favicon.
238  std::set<GURL> urls;
239};
240
241// PageVisit ------------------------------------------------------------------
242
243// Represents a simplified version of a visit for external users. Normally,
244// views are only interested in the time, and not the other information
245// associated with a VisitRow.
246struct PageVisit {
247  URLID page_id;
248  base::Time visit_time;
249};
250
251// StarredEntry ---------------------------------------------------------------
252
253// StarredEntry represents either a starred page, or a star grouping (where
254// a star grouping consists of child starred entries). Use the type to
255// determine the type of a particular entry.
256//
257// The database internally uses the id field to uniquely identify a starred
258// entry. On the other hand, the UI, which is anything routed through
259// HistoryService and HistoryBackend (including BookmarkBarView), uses the
260// url field to uniquely identify starred entries of type URL and the group_id
261// field to uniquely identify starred entries of type USER_GROUP. For example,
262// HistoryService::UpdateStarredEntry identifies the entry by url (if the
263// type is URL) or group_id (if the type is not URL).
264struct StarredEntry {
265  enum Type {
266    // Type represents a starred URL (StarredEntry).
267    URL,
268
269    // The bookmark bar grouping.
270    BOOKMARK_BAR,
271
272    // User created group.
273    USER_GROUP,
274
275    // The "other bookmarks" folder that holds uncategorized bookmarks.
276    OTHER
277  };
278
279  StarredEntry();
280
281  void Swap(StarredEntry* other);
282
283  // Unique identifier of this entry.
284  StarID id;
285
286  // Title.
287  string16 title;
288
289  // When this was added.
290  base::Time date_added;
291
292  // Group ID of the star group this entry is in. If 0, this entry is not
293  // in a star group.
294  UIStarID parent_group_id;
295
296  // Unique identifier for groups. This is assigned by the UI.
297  //
298  // WARNING: this is NOT the same as id, id is assigned by the database,
299  // this is assigned by the UI. See note about StarredEntry for more info.
300  UIStarID group_id;
301
302  // Visual order within the parent. Only valid if group_id is not 0.
303  int visual_order;
304
305  // Type of this entry (see enum).
306  Type type;
307
308  // If type == URL, this is the URL of the page that was starred.
309  GURL url;
310
311  // If type == URL, this is the ID of the URL of the primary page that was
312  // starred.
313  history::URLID url_id;
314
315  // Time the entry was last modified. This is only used for groups and
316  // indicates the last time a URL was added as a child to the group.
317  base::Time date_group_modified;
318};
319
320// URLResult -------------------------------------------------------------------
321
322class URLResult : public URLRow {
323 public:
324  URLResult() {}
325  URLResult(const GURL& url, base::Time visit_time)
326      : URLRow(url),
327        visit_time_(visit_time) {
328  }
329  // Constructor that create a URLResult from the specified URL and title match
330  // positions from title_matches.
331  URLResult(const GURL& url, const Snippet::MatchPositions& title_matches)
332      : URLRow(url) {
333    title_match_positions_ = title_matches;
334  }
335
336  base::Time visit_time() const { return visit_time_; }
337  void set_visit_time(base::Time visit_time) { visit_time_ = visit_time; }
338
339  const Snippet& snippet() const { return snippet_; }
340
341  // If this is a title match, title_match_positions contains an entry for
342  // every word in the title that matched one of the query parameters. Each
343  // entry contains the start and end of the match.
344  const Snippet::MatchPositions& title_match_positions() const {
345    return title_match_positions_;
346  }
347
348  virtual void Swap(URLResult* other);
349
350 private:
351  friend class HistoryBackend;
352
353  // The time that this result corresponds to.
354  base::Time visit_time_;
355
356  // These values are typically set by HistoryBackend.
357  Snippet snippet_;
358  Snippet::MatchPositions title_match_positions_;
359
360  // We support the implicit copy constructor and operator=.
361};
362
363// QueryResults ----------------------------------------------------------------
364
365// Encapsulates the results of a history query. It supports an ordered list of
366// URLResult objects, plus an efficient way of looking up the index of each time
367// a given URL appears in those results.
368class QueryResults {
369 public:
370  typedef std::vector<URLResult*> URLResultVector;
371
372  QueryResults();
373  ~QueryResults();
374
375  // Indicates the first time that the query includes results for (queries are
376  // clipped at the beginning, so it will always include to the end of the time
377  // queried).
378  //
379  // If the number of results was clipped as a result of the max count, this
380  // will be the time of the first query returned. If there were fewer results
381  // than we were allowed to return, this represents the first date considered
382  // in the query (this will be before the first result if there was time
383  // queried with no results).
384  //
385  // TODO(brettw): bug 1203054: This field is not currently set properly! Do
386  // not use until the bug is fixed.
387  base::Time first_time_searched() const { return first_time_searched_; }
388  void set_first_time_searched(base::Time t) { first_time_searched_ = t; }
389  // Note: If you need end_time_searched, it can be added.
390
391  void set_reached_beginning(bool reached) { reached_beginning_ = reached; }
392  bool reached_beginning() { return reached_beginning_; }
393
394  size_t size() const { return results_.size(); }
395  bool empty() const { return results_.empty(); }
396
397  URLResult& operator[](size_t i) { return *results_[i]; }
398  const URLResult& operator[](size_t i) const { return *results_[i]; }
399
400  URLResultVector::const_iterator begin() const { return results_.begin(); }
401  URLResultVector::const_iterator end() const { return results_.end(); }
402  URLResultVector::const_reverse_iterator rbegin() const {
403    return results_.rbegin();
404  }
405  URLResultVector::const_reverse_iterator rend() const {
406    return results_.rend();
407  }
408
409  // Returns a pointer to the beginning of an array of all matching indices
410  // for entries with the given URL. The array will be |*num_matches| long.
411  // |num_matches| can be NULL if the caller is not interested in the number of
412  // results (commonly it will only be interested in the first one and can test
413  // the pointer for NULL).
414  //
415  // When there is no match, it will return NULL and |*num_matches| will be 0.
416  const size_t* MatchesForURL(const GURL& url, size_t* num_matches) const;
417
418  // Swaps the current result with another. This allows ownership to be
419  // efficiently transferred without copying.
420  void Swap(QueryResults* other);
421
422  // Adds the given result to the map, using swap() on the members to avoid
423  // copying (there are a lot of strings and vectors). This means the parameter
424  // object will be cleared after this call.
425  void AppendURLBySwapping(URLResult* result);
426
427  // Appends a new result set to the other. The |other| results will be
428  // destroyed because the pointer ownership will just be transferred. When
429  // |remove_dupes| is set, each URL that appears in this array will be removed
430  // from the |other| array before appending.
431  void AppendResultsBySwapping(QueryResults* other, bool remove_dupes);
432
433  // Removes all instances of the given URL from the result set.
434  void DeleteURL(const GURL& url);
435
436  // Deletes the given range of items in the result set.
437  void DeleteRange(size_t begin, size_t end);
438
439 private:
440  // Maps the given URL to a list of indices into results_ which identify each
441  // time an entry with that URL appears. Normally, each URL will have one or
442  // very few indices after it, so we optimize this to use statically allocated
443  // memory when possible.
444  typedef std::map<GURL, StackVector<size_t, 4> > URLToResultIndices;
445
446  // Inserts an entry into the |url_to_results_| map saying that the given URL
447  // is at the given index in the results_.
448  void AddURLUsageAtIndex(const GURL& url, size_t index);
449
450  // Adds |delta| to each index in url_to_results_ in the range [begin,end]
451  // (this is inclusive). This is used when inserting or deleting.
452  void AdjustResultMap(size_t begin, size_t end, ptrdiff_t delta);
453
454  base::Time first_time_searched_;
455
456  // Whether the query reaches the beginning of the database.
457  bool reached_beginning_;
458
459  // The ordered list of results. The pointers inside this are owned by this
460  // QueryResults object.
461  URLResultVector results_;
462
463  // Maps URLs to entries in results_.
464  URLToResultIndices url_to_results_;
465
466  DISALLOW_COPY_AND_ASSIGN(QueryResults);
467};
468
469// QueryOptions ----------------------------------------------------------------
470
471struct QueryOptions {
472  QueryOptions() : max_count(0) {}
473
474  // The time range to search for matches in.
475  //
476  // This will match only the one recent visit of a URL.  For text search
477  // queries, if the URL was visited in the given time period, but has also been
478  // visited more recently than that, it will not be returned. When the text
479  // query is empty, this will return the most recent visit within the time
480  // range.
481  //
482  // As a special case, if both times are is_null(), then the entire database
483  // will be searched. However, if you set one, you must set the other.
484  //
485  // The beginning is inclusive and the ending is exclusive.
486  base::Time begin_time;
487  base::Time end_time;
488
489  // Sets the query time to the last |days_ago| days to the present time.
490  void SetRecentDayRange(int days_ago) {
491    end_time = base::Time::Now();
492    begin_time = end_time - base::TimeDelta::FromDays(days_ago);
493  }
494
495  // The maximum number of results to return. The results will be sorted with
496  // the most recent first, so older results may not be returned if there is not
497  // enough room. When 0, this will return everything (the default).
498  int max_count;
499};
500
501// KeywordSearchTermVisit -----------------------------------------------------
502
503// KeywordSearchTermVisit is returned from GetMostRecentKeywordSearchTerms. It
504// gives the time and search term of the keyword visit.
505struct KeywordSearchTermVisit {
506  // The time of the visit.
507  base::Time time;
508
509  // The search term that was used.
510  string16 term;
511};
512
513// MostVisitedURL --------------------------------------------------------------
514
515// Holds the per-URL information of the most visited query.
516struct MostVisitedURL {
517  GURL url;
518  GURL favicon_url;
519  string16 title;
520
521  RedirectList redirects;
522
523  bool operator==(const MostVisitedURL& other) {
524    return url == other.url;
525  }
526};
527
528typedef std::vector<MostVisitedURL> MostVisitedURLList;
529
530}  // history
531
532#endif  // CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
533