1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
6#define CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
7#pragma once
8
9#include <deque>
10#include <map>
11#include <set>
12#include <string>
13#include <vector>
14
15#include "base/basictypes.h"
16#include "base/memory/ref_counted_memory.h"
17#include "base/stack_container.h"
18#include "base/string16.h"
19#include "base/time.h"
20#include "chrome/browser/history/snippet.h"
21#include "chrome/browser/search_engines/template_url_id.h"
22#include "chrome/common/ref_counted_util.h"
23#include "chrome/common/thumbnail_score.h"
24#include "content/common/page_transition_types.h"
25#include "googleurl/src/gurl.h"
26
27namespace history {
28
29// Forward declaration for friend statements.
30class HistoryBackend;
31class URLDatabase;
32
33// Structure to hold redirect lists for URLs.  For a redirect chain
34// A -> B -> C, and entry in the map would look like "A => {B -> C}".
35typedef std::map<GURL, scoped_refptr<RefCountedVector<GURL> > > RedirectMap;
36
37// Container for a list of URLs.
38typedef std::vector<GURL> RedirectList;
39
40typedef int64 StarID;  // Unique identifier for star entries.
41typedef int64 UIStarID;  // Identifier for star entries that come from the UI.
42typedef int64 DownloadID;   // Identifier for a download.
43typedef int64 FaviconID;  // For favicons.
44typedef int64 SegmentID;  // URL segments for the most visited view.
45typedef int64 IconMappingID; // For page url and icon mapping.
46
47// URLRow ---------------------------------------------------------------------
48
49typedef int64 URLID;
50
51// Holds all information globally associated with one URL (one row in the
52// URL table).
53//
54// This keeps track of dirty bits, which are currently unused:
55//
56// TODO(brettw) the dirty bits are broken in a number of respects. First, the
57// database will want to update them on a const object, so they need to be
58// mutable.
59//
60// Second, there is a problem copying. If you make a copy of this structure
61// (as we allow since we put this into vectors in various places) then the
62// dirty bits will not be in sync for these copies.
63class URLRow {
64 public:
65  URLRow();
66
67  explicit URLRow(const GURL& url);
68
69  // We need to be able to set the id of a URLRow that's being passed through
70  // an IPC message.  This constructor should probably not be used otherwise.
71  URLRow(const GURL& url, URLID id);
72
73  virtual ~URLRow();
74  URLRow& operator=(const URLRow& other);
75
76  URLID id() const { return id_; }
77  const GURL& url() const { return url_; }
78
79  const string16& title() const {
80    return title_;
81  }
82  void set_title(const string16& title) {
83    // The title is frequently set to the same thing, so we don't bother
84    // updating unless the string has changed.
85    if (title != title_) {
86      title_ = title;
87    }
88  }
89
90  int visit_count() const {
91    return visit_count_;
92  }
93  void set_visit_count(int visit_count) {
94    visit_count_ = visit_count;
95  }
96
97  // Number of times the URL was typed in the Omnibox.
98  int typed_count() const {
99    return typed_count_;
100  }
101  void set_typed_count(int typed_count) {
102    typed_count_ = typed_count;
103  }
104
105  base::Time last_visit() const {
106    return last_visit_;
107  }
108  void set_last_visit(base::Time last_visit) {
109    last_visit_ = last_visit;
110  }
111
112  // If this is set, we won't autocomplete this URL.
113  bool hidden() const {
114    return hidden_;
115  }
116  void set_hidden(bool hidden) {
117    hidden_ = hidden;
118  }
119
120 protected:
121  // Swaps the contents of this URLRow with another, which allows it to be
122  // destructively copied without memory allocations.
123  void Swap(URLRow* other);
124
125 private:
126  // This class writes directly into this structure and clears our dirty bits
127  // when reading out of the DB.
128  friend class URLDatabase;
129  friend class HistoryBackend;
130
131  // Initializes all values that need initialization to their defaults.
132  // This excludes objects which autoinitialize such as strings.
133  void Initialize();
134
135  // The row ID of this URL. Immutable except for the database which sets it
136  // when it pulls them out.
137  URLID id_;
138
139  // The URL of this row. Immutable except for the database which sets it
140  // when it pulls them out. If clients want to change it, they must use
141  // the constructor to make a new one.
142  GURL url_;
143
144  string16 title_;
145
146  // Total number of times this URL has been visited.
147  int visit_count_;
148
149  // Number of times this URL has been manually entered in the URL bar.
150  int typed_count_;
151
152  // The date of the last visit of this URL, which saves us from having to
153  // loop up in the visit table for things like autocomplete and expiration.
154  base::Time last_visit_;
155
156  // Indicates this entry should now be shown in typical UI or queries, this
157  // is usually for subframes.
158  bool hidden_;
159
160  // We support the implicit copy constuctor and operator=.
161};
162
163// The enumeration of all possible sources of visits is listed below.
164// The source will be propagated along with a URL or a visit item
165// and eventually be stored in the history database,
166// visit_source table specifically.
167// Different from page transition types, they describe the origins of visits.
168// (Warning): Please don't change any existing values while it is ok to add
169// new values when needed.
170enum VisitSource {
171  SOURCE_SYNCED = 0,         // Synchronized from somewhere else.
172  SOURCE_BROWSED = 1,        // User browsed.
173  SOURCE_EXTENSION = 2,      // Added by an externsion.
174  SOURCE_FIREFOX_IMPORTED = 3,
175  SOURCE_IE_IMPORTED = 4,
176  SOURCE_SAFARI_IMPORTED = 5,
177};
178
179typedef int64 VisitID;
180// Structure to hold the mapping between each visit's id and its source.
181typedef std::map<VisitID, VisitSource> VisitSourceMap;
182
183// VisitRow -------------------------------------------------------------------
184
185// Holds all information associated with a specific visit. A visit holds time
186// and referrer information for one time a URL is visited.
187class VisitRow {
188 public:
189  VisitRow();
190  VisitRow(URLID arg_url_id,
191           base::Time arg_visit_time,
192           VisitID arg_referring_visit,
193           PageTransition::Type arg_transition,
194           SegmentID arg_segment_id);
195  ~VisitRow();
196
197  // ID of this row (visit ID, used a a referrer for other visits).
198  VisitID visit_id;
199
200  // Row ID into the URL table of the URL that this page is.
201  URLID url_id;
202
203  base::Time visit_time;
204
205  // Indicates another visit that was the referring page for this one.
206  // 0 indicates no referrer.
207  VisitID referring_visit;
208
209  // A combination of bits from PageTransition.
210  PageTransition::Type transition;
211
212  // The segment id (see visitsegment_database.*).
213  // If 0, the segment id is null in the table.
214  SegmentID segment_id;
215
216  // True when this visit has indexed data for it. We try to keep this in sync
217  // with the full text index: when we add or remove things from there, we will
218  // update the visit table as well. However, that file could get deleted, or
219  // out of sync in various ways, so this flag should be false when things
220  // change.
221  bool is_indexed;
222
223  // Compares two visits based on dates, for sorting.
224  bool operator<(const VisitRow& other) {
225    return visit_time < other.visit_time;
226  }
227
228  // We allow the implicit copy constuctor and operator=.
229};
230
231// We pass around vectors of visits a lot
232typedef std::vector<VisitRow> VisitVector;
233
234// Favicons -------------------------------------------------------------------
235
236// Used by the importer to set favicons for imported bookmarks.
237struct ImportedFaviconUsage {
238  ImportedFaviconUsage();
239  ~ImportedFaviconUsage();
240
241  // The URL of the favicon.
242  GURL favicon_url;
243
244  // The raw png-encoded data.
245  std::vector<unsigned char> png_data;
246
247  // The list of URLs using this favicon.
248  std::set<GURL> urls;
249};
250
251// PageVisit ------------------------------------------------------------------
252
253// Represents a simplified version of a visit for external users. Normally,
254// views are only interested in the time, and not the other information
255// associated with a VisitRow.
256struct PageVisit {
257  URLID page_id;
258  base::Time visit_time;
259};
260
261// StarredEntry ---------------------------------------------------------------
262
263// StarredEntry represents either a starred page, or a folder (where a folder
264// consists of child starred entries). Use the type to determine the type of a
265// particular entry.
266//
267// The database internally uses the id field to uniquely identify a starred
268// entry. On the other hand, the UI, which is anything routed through
269// HistoryService and HistoryBackend (including BookmarkBarView), uses the
270// url field to uniquely identify starred entries of type URL and the folder_id
271// field to uniquely identify starred entries of type USER_FOLDER. For example,
272// HistoryService::UpdateStarredEntry identifies the entry by url (if the
273// type is URL) or folder_id (if the type is not URL).
274struct StarredEntry {
275  enum Type {
276    // Type represents a starred URL.
277    URL,
278
279    // The bookmark bar folder.
280    BOOKMARK_BAR,
281
282    // User created folder.
283    USER_FOLDER,
284
285    // The "other bookmarks" folder that holds uncategorized bookmarks.
286    OTHER
287  };
288
289  StarredEntry();
290  ~StarredEntry();
291
292  void Swap(StarredEntry* other);
293
294  // Unique identifier of this entry.
295  StarID id;
296
297  // Title.
298  string16 title;
299
300  // When this was added.
301  base::Time date_added;
302
303  // Folder ID of the folder this entry is in. If 0, this entry is not in a
304  // folder.
305  UIStarID parent_folder_id;
306
307  // Unique identifier for folders. This is assigned by the UI.
308  //
309  // WARNING: this is NOT the same as id, id is assigned by the database,
310  // this is assigned by the UI. See note about StarredEntry for more info.
311  UIStarID folder_id;
312
313  // Visual order within the parent. Only valid if folder_id is not 0.
314  int visual_order;
315
316  // Type of this entry (see enum).
317  Type type;
318
319  // If type == URL, this is the URL of the page that was starred.
320  GURL url;
321
322  // If type == URL, this is the ID of the URL of the primary page that was
323  // starred.
324  URLID url_id;
325
326  // Time the entry was last modified. This is only used for folders and
327  // indicates the last time a URL was added as a child to the folder.
328  base::Time date_folder_modified;
329};
330
331// URLResult -------------------------------------------------------------------
332
333class URLResult : public URLRow {
334 public:
335  URLResult();
336  URLResult(const GURL& url, base::Time visit_time);
337  // Constructor that create a URLResult from the specified URL and title match
338  // positions from title_matches.
339  URLResult(const GURL& url, const Snippet::MatchPositions& title_matches);
340  ~URLResult();
341
342  base::Time visit_time() const { return visit_time_; }
343  void set_visit_time(base::Time visit_time) { visit_time_ = visit_time; }
344
345  const Snippet& snippet() const { return snippet_; }
346
347  // If this is a title match, title_match_positions contains an entry for
348  // every word in the title that matched one of the query parameters. Each
349  // entry contains the start and end of the match.
350  const Snippet::MatchPositions& title_match_positions() const {
351    return title_match_positions_;
352  }
353
354  void SwapResult(URLResult* other);
355
356 private:
357  friend class HistoryBackend;
358
359  // The time that this result corresponds to.
360  base::Time visit_time_;
361
362  // These values are typically set by HistoryBackend.
363  Snippet snippet_;
364  Snippet::MatchPositions title_match_positions_;
365
366  // We support the implicit copy constructor and operator=.
367};
368
369// QueryResults ----------------------------------------------------------------
370
371// Encapsulates the results of a history query. It supports an ordered list of
372// URLResult objects, plus an efficient way of looking up the index of each time
373// a given URL appears in those results.
374class QueryResults {
375 public:
376  typedef std::vector<URLResult*> URLResultVector;
377
378  QueryResults();
379  ~QueryResults();
380
381  // Indicates the first time that the query includes results for (queries are
382  // clipped at the beginning, so it will always include to the end of the time
383  // queried).
384  //
385  // If the number of results was clipped as a result of the max count, this
386  // will be the time of the first query returned. If there were fewer results
387  // than we were allowed to return, this represents the first date considered
388  // in the query (this will be before the first result if there was time
389  // queried with no results).
390  //
391  // TODO(brettw): bug 1203054: This field is not currently set properly! Do
392  // not use until the bug is fixed.
393  base::Time first_time_searched() const { return first_time_searched_; }
394  void set_first_time_searched(base::Time t) { first_time_searched_ = t; }
395  // Note: If you need end_time_searched, it can be added.
396
397  void set_reached_beginning(bool reached) { reached_beginning_ = reached; }
398  bool reached_beginning() { return reached_beginning_; }
399
400  size_t size() const { return results_.size(); }
401  bool empty() const { return results_.empty(); }
402
403  URLResult& operator[](size_t i) { return *results_[i]; }
404  const URLResult& operator[](size_t i) const { return *results_[i]; }
405
406  URLResultVector::const_iterator begin() const { return results_.begin(); }
407  URLResultVector::const_iterator end() const { return results_.end(); }
408  URLResultVector::const_reverse_iterator rbegin() const {
409    return results_.rbegin();
410  }
411  URLResultVector::const_reverse_iterator rend() const {
412    return results_.rend();
413  }
414
415  // Returns a pointer to the beginning of an array of all matching indices
416  // for entries with the given URL. The array will be |*num_matches| long.
417  // |num_matches| can be NULL if the caller is not interested in the number of
418  // results (commonly it will only be interested in the first one and can test
419  // the pointer for NULL).
420  //
421  // When there is no match, it will return NULL and |*num_matches| will be 0.
422  const size_t* MatchesForURL(const GURL& url, size_t* num_matches) const;
423
424  // Swaps the current result with another. This allows ownership to be
425  // efficiently transferred without copying.
426  void Swap(QueryResults* other);
427
428  // Adds the given result to the map, using swap() on the members to avoid
429  // copying (there are a lot of strings and vectors). This means the parameter
430  // object will be cleared after this call.
431  void AppendURLBySwapping(URLResult* result);
432
433  // Appends a new result set to the other. The |other| results will be
434  // destroyed because the pointer ownership will just be transferred. When
435  // |remove_dupes| is set, each URL that appears in this array will be removed
436  // from the |other| array before appending.
437  void AppendResultsBySwapping(QueryResults* other, bool remove_dupes);
438
439  // Removes all instances of the given URL from the result set.
440  void DeleteURL(const GURL& url);
441
442  // Deletes the given range of items in the result set.
443  void DeleteRange(size_t begin, size_t end);
444
445 private:
446  // Maps the given URL to a list of indices into results_ which identify each
447  // time an entry with that URL appears. Normally, each URL will have one or
448  // very few indices after it, so we optimize this to use statically allocated
449  // memory when possible.
450  typedef std::map<GURL, StackVector<size_t, 4> > URLToResultIndices;
451
452  // Inserts an entry into the |url_to_results_| map saying that the given URL
453  // is at the given index in the results_.
454  void AddURLUsageAtIndex(const GURL& url, size_t index);
455
456  // Adds |delta| to each index in url_to_results_ in the range [begin,end]
457  // (this is inclusive). This is used when inserting or deleting.
458  void AdjustResultMap(size_t begin, size_t end, ptrdiff_t delta);
459
460  base::Time first_time_searched_;
461
462  // Whether the query reaches the beginning of the database.
463  bool reached_beginning_;
464
465  // The ordered list of results. The pointers inside this are owned by this
466  // QueryResults object.
467  URLResultVector results_;
468
469  // Maps URLs to entries in results_.
470  URLToResultIndices url_to_results_;
471
472  DISALLOW_COPY_AND_ASSIGN(QueryResults);
473};
474
475// QueryOptions ----------------------------------------------------------------
476
477struct QueryOptions {
478  QueryOptions();
479
480  // The time range to search for matches in.
481  //
482  // This will match only the one recent visit of a URL.  For text search
483  // queries, if the URL was visited in the given time period, but has also been
484  // visited more recently than that, it will not be returned. When the text
485  // query is empty, this will return the most recent visit within the time
486  // range.
487  //
488  // As a special case, if both times are is_null(), then the entire database
489  // will be searched. However, if you set one, you must set the other.
490  //
491  // The beginning is inclusive and the ending is exclusive.
492  base::Time begin_time;
493  base::Time end_time;
494
495  // Sets the query time to the last |days_ago| days to the present time.
496  void SetRecentDayRange(int days_ago);
497
498  // The maximum number of results to return. The results will be sorted with
499  // the most recent first, so older results may not be returned if there is not
500  // enough room. When 0, this will return everything (the default).
501  int max_count;
502};
503
504// KeywordSearchTermVisit -----------------------------------------------------
505
506// KeywordSearchTermVisit is returned from GetMostRecentKeywordSearchTerms. It
507// gives the time and search term of the keyword visit.
508struct KeywordSearchTermVisit {
509  KeywordSearchTermVisit();
510  ~KeywordSearchTermVisit();
511
512  // The time of the visit.
513  base::Time time;
514
515  // The search term that was used.
516  string16 term;
517};
518
519// KeywordSearchTermRow --------------------------------------------------------
520
521// Used for URLs that have a search term associated with them.
522struct KeywordSearchTermRow {
523  KeywordSearchTermRow();
524  ~KeywordSearchTermRow();
525
526  // ID of the keyword.
527  TemplateURLID keyword_id;
528
529  // ID of the url.
530  URLID url_id;
531
532  // The search term that was used.
533  string16 term;
534};
535
536// MostVisitedURL --------------------------------------------------------------
537
538// Holds the per-URL information of the most visited query.
539struct MostVisitedURL {
540  MostVisitedURL();
541  MostVisitedURL(const GURL& in_url,
542                 const GURL& in_favicon_url,
543                 const string16& in_title);
544  ~MostVisitedURL();
545
546  GURL url;
547  GURL favicon_url;
548  string16 title;
549
550  RedirectList redirects;
551
552  bool operator==(const MostVisitedURL& other) {
553    return url == other.url;
554  }
555};
556
557// Navigation -----------------------------------------------------------------
558
559// Marshalling structure for AddPage.
560class HistoryAddPageArgs
561    : public base::RefCountedThreadSafe<HistoryAddPageArgs> {
562 public:
563  HistoryAddPageArgs(const GURL& arg_url,
564                     base::Time arg_time,
565                     const void* arg_id_scope,
566                     int32 arg_page_id,
567                     const GURL& arg_referrer,
568                     const history::RedirectList& arg_redirects,
569                     PageTransition::Type arg_transition,
570                     VisitSource arg_source,
571                     bool arg_did_replace_entry);
572
573  // Returns a new HistoryAddPageArgs that is a copy of this (ref count is
574  // of course reset). Ownership of returned object passes to caller.
575  HistoryAddPageArgs* Clone() const;
576
577  GURL url;
578  base::Time time;
579
580  const void* id_scope;
581  int32 page_id;
582
583  GURL referrer;
584  history::RedirectList redirects;
585  PageTransition::Type transition;
586  VisitSource visit_source;
587  bool did_replace_entry;
588
589 private:
590  friend class base::RefCountedThreadSafe<HistoryAddPageArgs>;
591
592  ~HistoryAddPageArgs();
593
594  DISALLOW_COPY_AND_ASSIGN(HistoryAddPageArgs);
595};
596
597// TopSites -------------------------------------------------------------------
598
599typedef std::vector<MostVisitedURL> MostVisitedURLList;
600
601// Used by TopSites to store the thumbnails.
602struct Images {
603  Images();
604  ~Images();
605
606  scoped_refptr<RefCountedBytes> thumbnail;
607  ThumbnailScore thumbnail_score;
608
609  // TODO(brettw): this will eventually store the favicon.
610  // scoped_refptr<RefCountedBytes> favicon;
611};
612
613typedef std::vector<MostVisitedURL> MostVisitedURLList;
614
615struct MostVisitedURLWithRank {
616  MostVisitedURL url;
617  int rank;
618};
619
620typedef std::vector<MostVisitedURLWithRank> MostVisitedURLWithRankList;
621
622struct TopSitesDelta {
623  TopSitesDelta();
624  ~TopSitesDelta();
625
626  MostVisitedURLList deleted;
627  MostVisitedURLWithRankList added;
628  MostVisitedURLWithRankList moved;
629};
630
631typedef std::map<GURL, scoped_refptr<RefCountedBytes> > URLToThumbnailMap;
632
633// Used when migrating most visited thumbnails out of history and into topsites.
634struct ThumbnailMigration {
635  ThumbnailMigration();
636  ~ThumbnailMigration();
637
638  MostVisitedURLList most_visited;
639  URLToThumbnailMap url_to_thumbnail_map;
640};
641
642typedef std::map<GURL, Images> URLToImagesMap;
643
644class MostVisitedThumbnails
645    : public base::RefCountedThreadSafe<MostVisitedThumbnails> {
646 public:
647  MostVisitedThumbnails();
648  virtual ~MostVisitedThumbnails();
649
650  MostVisitedURLList most_visited;
651  URLToImagesMap url_to_images_map;
652
653 private:
654  friend class base::RefCountedThreadSafe<MostVisitedThumbnails>;
655
656  DISALLOW_COPY_AND_ASSIGN(MostVisitedThumbnails);
657};
658
659// Autocomplete thresholds -----------------------------------------------------
660
661// Constants which specify, when considered altogether, 'significant'
662// history items. These are used to filter out insignificant items
663// for consideration as autocomplete candidates.
664extern const int kLowQualityMatchTypedLimit;
665extern const int kLowQualityMatchVisitLimit;
666extern const int kLowQualityMatchAgeLimitInDays;
667
668// Returns the date threshold for considering an history item as significant.
669base::Time AutocompleteAgeThreshold();
670
671// Return true if |row| qualifies as an autocomplete candidate. If |time_cache|
672// is_null() then this function determines a new time threshold each time it is
673// called. Since getting system time can be costly (such as for cases where
674// this function will be called in a loop over many history items), you can
675// provide a non-null |time_cache| by simply initializing |time_cache| with
676// AutocompleteAgeThreshold() (or any other desired time in the past).
677bool RowQualifiesAsSignificant(const URLRow& row, const base::Time& threshold);
678
679// Defines the icon types. They are also stored in icon_type field of favicons
680// table.
681enum IconType {
682  INVALID_ICON = 0x0,
683  FAVICON = 1 << 0,
684  TOUCH_ICON = 1 << 1,
685  TOUCH_PRECOMPOSED_ICON = 1 << 2
686};
687
688// Used for the mapping between the page and icon.
689struct IconMapping {
690  IconMapping();
691  ~IconMapping();
692
693  // The unique id of the mapping.
694  IconMappingID mapping_id;
695
696  // The url of a web page.
697  GURL page_url;
698
699  // The unique id of the icon.
700  FaviconID icon_id;
701
702  // The type of icon.
703  IconType icon_type;
704};
705
706// Defines the favicon stored in history backend.
707struct FaviconData {
708  FaviconData();
709  ~FaviconData();
710
711  // Returns true if the icon is known and image has data.
712  bool is_valid();
713
714  // Indicates whether the icon is known by the history backend.
715  bool known_icon;
716
717  // The bits of image.
718  scoped_refptr<RefCountedMemory> image_data;
719
720  // Indicates whether image is expired.
721  bool expired;
722
723  // The icon's URL.
724  GURL icon_url;
725
726  // The type of favicon.
727  history::IconType icon_type;
728};
729
730}  // namespace history
731
732#endif  // CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
733