1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/history/expire_history_backend.h"
6
7#include <algorithm>
8#include <functional>
9#include <limits>
10
11#include "base/bind.h"
12#include "base/compiler_specific.h"
13#include "base/files/file_enumerator.h"
14#include "base/files/file_util.h"
15#include "base/logging.h"
16#include "base/message_loop/message_loop.h"
17#include "chrome/browser/chrome_notification_types.h"
18#include "chrome/browser/history/history_database.h"
19#include "chrome/browser/history/history_notifications.h"
20#include "chrome/browser/history/thumbnail_database.h"
21#include "components/history/core/browser/history_client.h"
22
23namespace history {
24
25// Helpers --------------------------------------------------------------------
26
27namespace {
28
29// The number of days by which the expiration threshold is advanced for items
30// that we want to expire early, such as those of AUTO_SUBFRAME transition type.
31//
32// Early expiration stuff is kept around only for edge cases, as subframes
33// don't appear in history and the vast majority of them are ads anyway. The
34// main use case for these is if you're on a site with links to different
35// frames, you'll be able to see those links as visited, and we'll also be
36// able to get redirect information for those URLs.
37//
38// But since these uses are most valuable when you're actually on the site,
39// and because these can take up the bulk of your history, we get a lot of
40// space savings by deleting them quickly.
41const int kEarlyExpirationAdvanceDays = 3;
42
43// Reads all types of visits starting from beginning of time to the given end
44// time. This is the most general reader.
45class AllVisitsReader : public ExpiringVisitsReader {
46 public:
47  virtual bool Read(base::Time end_time,
48                    HistoryDatabase* db,
49                    VisitVector* visits,
50                    int max_visits) const OVERRIDE {
51    DCHECK(db) << "must have a database to operate upon";
52    DCHECK(visits) << "visit vector has to exist in order to populate it";
53
54    db->GetAllVisitsInRange(base::Time(), end_time, max_visits, visits);
55    // When we got the maximum number of visits we asked for, we say there could
56    // be additional things to expire now.
57    return static_cast<int>(visits->size()) == max_visits;
58  }
59};
60
61// Reads only AUTO_SUBFRAME visits, within a computed range. The range is
62// computed as follows:
63// * |begin_time| is read from the meta table. This value is updated whenever
64//   there are no more additional visits to expire by this reader.
65// * |end_time| is advanced forward by a constant (kEarlyExpirationAdvanceDay),
66//   but not past the current time.
67class AutoSubframeVisitsReader : public ExpiringVisitsReader {
68 public:
69  virtual bool Read(base::Time end_time,
70                    HistoryDatabase* db,
71                    VisitVector* visits,
72                    int max_visits) const OVERRIDE {
73    DCHECK(db) << "must have a database to operate upon";
74    DCHECK(visits) << "visit vector has to exist in order to populate it";
75
76    base::Time begin_time = db->GetEarlyExpirationThreshold();
77    // Advance |end_time| to expire early.
78    base::Time early_end_time = end_time +
79        base::TimeDelta::FromDays(kEarlyExpirationAdvanceDays);
80
81    // We don't want to set the early expiration threshold to a time in the
82    // future.
83    base::Time now = base::Time::Now();
84    if (early_end_time > now)
85      early_end_time = now;
86
87    db->GetVisitsInRangeForTransition(begin_time, early_end_time,
88                                      max_visits,
89                                      ui::PAGE_TRANSITION_AUTO_SUBFRAME,
90                                      visits);
91    bool more = static_cast<int>(visits->size()) == max_visits;
92    if (!more)
93      db->UpdateEarlyExpirationThreshold(early_end_time);
94
95    return more;
96  }
97};
98
99// The number of visits we will expire very time we check for old items. This
100// Prevents us from doing too much work any given time.
101const int kNumExpirePerIteration = 32;
102
103// The number of seconds between checking for items that should be expired when
104// we think there might be more items to expire. This timeout is used when the
105// last expiration found at least kNumExpirePerIteration and we want to check
106// again "soon."
107const int kExpirationDelaySec = 30;
108
109// The number of minutes between checking, as with kExpirationDelaySec, but
110// when we didn't find enough things to expire last time. If there was no
111// history to expire last iteration, it's likely there is nothing next
112// iteration, so we want to wait longer before checking to avoid wasting CPU.
113const int kExpirationEmptyDelayMin = 5;
114
115}  // namespace
116
117
118// ExpireHistoryBackend::DeleteEffects ----------------------------------------
119
120ExpireHistoryBackend::DeleteEffects::DeleteEffects() {
121}
122
123ExpireHistoryBackend::DeleteEffects::~DeleteEffects() {
124}
125
126
127// ExpireHistoryBackend -------------------------------------------------------
128
129ExpireHistoryBackend::ExpireHistoryBackend(
130    BroadcastNotificationDelegate* delegate,
131    HistoryClient* history_client)
132    : delegate_(delegate),
133      main_db_(NULL),
134      thumb_db_(NULL),
135      history_client_(history_client),
136      weak_factory_(this) {
137}
138
139ExpireHistoryBackend::~ExpireHistoryBackend() {
140}
141
142void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db,
143                                        ThumbnailDatabase* thumb_db) {
144  main_db_ = main_db;
145  thumb_db_ = thumb_db;
146}
147
148void ExpireHistoryBackend::DeleteURL(const GURL& url) {
149  DeleteURLs(std::vector<GURL>(1, url));
150}
151
152void ExpireHistoryBackend::DeleteURLs(const std::vector<GURL>& urls) {
153  if (!main_db_)
154    return;
155
156  DeleteEffects effects;
157  HistoryClient* history_client = GetHistoryClient();
158  for (std::vector<GURL>::const_iterator url = urls.begin(); url != urls.end();
159       ++url) {
160    URLRow url_row;
161    if (!main_db_->GetRowForURL(*url, &url_row))
162      continue;  // Nothing to delete.
163
164    // Collect all the visits and delete them. Note that we don't give up if
165    // there are no visits, since the URL could still have an entry that we
166    // should delete.
167    VisitVector visits;
168    main_db_->GetVisitsForURL(url_row.id(), &visits);
169
170    DeleteVisitRelatedInfo(visits, &effects);
171
172    // We skip ExpireURLsForVisits (since we are deleting from the
173    // URL, and not starting with visits in a given time range). We
174    // therefore need to call the deletion and favicon update
175    // functions manually.
176    DeleteOneURL(url_row,
177                 history_client && history_client->IsBookmarked(*url),
178                 &effects);
179  }
180
181  DeleteFaviconsIfPossible(&effects);
182
183  BroadcastNotifications(&effects, DELETION_USER_INITIATED);
184}
185
186void ExpireHistoryBackend::ExpireHistoryBetween(
187    const std::set<GURL>& restrict_urls,
188    base::Time begin_time,
189    base::Time end_time) {
190  if (!main_db_)
191    return;
192
193  // Find the affected visits and delete them.
194  VisitVector visits;
195  main_db_->GetAllVisitsInRange(begin_time, end_time, 0, &visits);
196  if (!restrict_urls.empty()) {
197    std::set<URLID> url_ids;
198    for (std::set<GURL>::const_iterator url = restrict_urls.begin();
199        url != restrict_urls.end(); ++url)
200      url_ids.insert(main_db_->GetRowForURL(*url, NULL));
201    VisitVector all_visits;
202    all_visits.swap(visits);
203    for (VisitVector::iterator visit = all_visits.begin();
204         visit != all_visits.end(); ++visit) {
205      if (url_ids.find(visit->url_id) != url_ids.end())
206        visits.push_back(*visit);
207    }
208  }
209  ExpireVisits(visits);
210}
211
212void ExpireHistoryBackend::ExpireHistoryForTimes(
213    const std::vector<base::Time>& times) {
214  // |times| must be in reverse chronological order and have no
215  // duplicates, i.e. each member must be earlier than the one before
216  // it.
217  DCHECK(
218      std::adjacent_find(
219          times.begin(), times.end(), std::less_equal<base::Time>()) ==
220      times.end());
221
222  if (!main_db_)
223    return;
224
225  // Find the affected visits and delete them.
226  VisitVector visits;
227  main_db_->GetVisitsForTimes(times, &visits);
228  ExpireVisits(visits);
229}
230
231void ExpireHistoryBackend::ExpireVisits(const VisitVector& visits) {
232  if (visits.empty())
233    return;
234
235  DeleteEffects effects;
236  DeleteVisitRelatedInfo(visits, &effects);
237
238  // Delete or update the URLs affected. We want to update the visit counts
239  // since this is called by the user who wants to delete their recent history,
240  // and we don't want to leave any evidence.
241  ExpireURLsForVisits(visits, &effects);
242  DeleteFaviconsIfPossible(&effects);
243  BroadcastNotifications(&effects, DELETION_USER_INITIATED);
244
245  // Pick up any bits possibly left over.
246  ParanoidExpireHistory();
247}
248
249void ExpireHistoryBackend::ExpireHistoryBefore(base::Time end_time) {
250  if (!main_db_)
251    return;
252
253  // Expire as much history as possible before the given date.
254  ExpireSomeOldHistory(end_time, GetAllVisitsReader(),
255                       std::numeric_limits<int>::max());
256  ParanoidExpireHistory();
257}
258
259void ExpireHistoryBackend::InitWorkQueue() {
260  DCHECK(work_queue_.empty()) << "queue has to be empty prior to init";
261
262  for (size_t i = 0; i < readers_.size(); i++)
263    work_queue_.push(readers_[i]);
264}
265
266const ExpiringVisitsReader* ExpireHistoryBackend::GetAllVisitsReader() {
267  if (!all_visits_reader_)
268    all_visits_reader_.reset(new AllVisitsReader());
269  return all_visits_reader_.get();
270}
271
272const ExpiringVisitsReader*
273    ExpireHistoryBackend::GetAutoSubframeVisitsReader() {
274  if (!auto_subframe_visits_reader_)
275    auto_subframe_visits_reader_.reset(new AutoSubframeVisitsReader());
276  return auto_subframe_visits_reader_.get();
277}
278
279void ExpireHistoryBackend::StartExpiringOldStuff(
280    base::TimeDelta expiration_threshold) {
281  expiration_threshold_ = expiration_threshold;
282
283  // Remove all readers, just in case this was method was called before.
284  readers_.clear();
285  // For now, we explicitly add all known readers. If we come up with more
286  // reader types (in case we want to expire different types of visits in
287  // different ways), we can make it be populated by creator/owner of
288  // ExpireHistoryBackend.
289  readers_.push_back(GetAllVisitsReader());
290  readers_.push_back(GetAutoSubframeVisitsReader());
291
292  // Initialize the queue with all tasks for the first set of iterations.
293  InitWorkQueue();
294  ScheduleExpire();
295}
296
297void ExpireHistoryBackend::DeleteFaviconsIfPossible(DeleteEffects* effects) {
298  if (!thumb_db_)
299    return;
300
301  for (std::set<favicon_base::FaviconID>::const_iterator i =
302           effects->affected_favicons.begin();
303       i != effects->affected_favicons.end(); ++i) {
304    if (!thumb_db_->HasMappingFor(*i)) {
305      GURL icon_url;
306      favicon_base::IconType icon_type;
307      if (thumb_db_->GetFaviconHeader(*i,
308                                      &icon_url,
309                                      &icon_type) &&
310          thumb_db_->DeleteFavicon(*i)) {
311        effects->deleted_favicons.insert(icon_url);
312      }
313    }
314  }
315}
316
317void ExpireHistoryBackend::BroadcastNotifications(DeleteEffects* effects,
318                                                  DeletionType type) {
319  if (!effects->modified_urls.empty()) {
320    scoped_ptr<URLsModifiedDetails> details(new URLsModifiedDetails);
321    details->changed_urls = effects->modified_urls;
322    delegate_->NotifySyncURLsModified(&details->changed_urls);
323    delegate_->BroadcastNotifications(
324        chrome::NOTIFICATION_HISTORY_URLS_MODIFIED,
325        details.PassAs<HistoryDetails>());
326  }
327  if (!effects->deleted_urls.empty()) {
328    scoped_ptr<URLsDeletedDetails> details(new URLsDeletedDetails);
329    details->all_history = false;
330    details->expired = (type == DELETION_EXPIRED);
331    details->rows = effects->deleted_urls;
332    details->favicon_urls = effects->deleted_favicons;
333    delegate_->NotifySyncURLsDeleted(details->all_history, details->expired,
334                                     &details->rows);
335    delegate_->BroadcastNotifications(chrome::NOTIFICATION_HISTORY_URLS_DELETED,
336                                      details.PassAs<HistoryDetails>());
337  }
338}
339
340void ExpireHistoryBackend::DeleteVisitRelatedInfo(const VisitVector& visits,
341                                                  DeleteEffects* effects) {
342  for (size_t i = 0; i < visits.size(); i++) {
343    // Delete the visit itself.
344    main_db_->DeleteVisit(visits[i]);
345
346    // Add the URL row to the affected URL list.
347    if (!effects->affected_urls.count(visits[i].url_id)) {
348      URLRow row;
349      if (main_db_->GetURLRow(visits[i].url_id, &row))
350        effects->affected_urls[visits[i].url_id] = row;
351    }
352  }
353}
354
355void ExpireHistoryBackend::DeleteOneURL(const URLRow& url_row,
356                                        bool is_bookmarked,
357                                        DeleteEffects* effects) {
358  main_db_->DeleteSegmentForURL(url_row.id());
359
360  if (!is_bookmarked) {
361    effects->deleted_urls.push_back(url_row);
362
363    // Delete stuff that references this URL.
364    if (thumb_db_) {
365      // Collect shared information.
366      std::vector<IconMapping> icon_mappings;
367      if (thumb_db_->GetIconMappingsForPageURL(url_row.url(), &icon_mappings)) {
368        for (std::vector<IconMapping>::iterator m = icon_mappings.begin();
369             m != icon_mappings.end(); ++m) {
370          effects->affected_favicons.insert(m->icon_id);
371        }
372        // Delete the mapping entries for the url.
373        thumb_db_->DeleteIconMappings(url_row.url());
374      }
375    }
376    // Last, delete the URL entry.
377    main_db_->DeleteURLRow(url_row.id());
378  }
379}
380
381namespace {
382
383struct ChangedURL {
384  ChangedURL() : visit_count(0), typed_count(0) {}
385  int visit_count;
386  int typed_count;
387};
388
389}  // namespace
390
391void ExpireHistoryBackend::ExpireURLsForVisits(const VisitVector& visits,
392                                               DeleteEffects* effects) {
393  // First find all unique URLs and the number of visits we're deleting for
394  // each one.
395  std::map<URLID, ChangedURL> changed_urls;
396  for (size_t i = 0; i < visits.size(); i++) {
397    ChangedURL& cur = changed_urls[visits[i].url_id];
398    // NOTE: This code must stay in sync with HistoryBackend::AddPageVisit().
399    // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as
400    // typed, which would help eliminate the need for this code (we still would
401    // need to handle RELOAD transitions specially, though).
402    ui::PageTransition transition =
403        ui::PageTransitionStripQualifier(visits[i].transition);
404    if (transition != ui::PAGE_TRANSITION_RELOAD)
405      cur.visit_count++;
406    if ((transition == ui::PAGE_TRANSITION_TYPED &&
407        !ui::PageTransitionIsRedirect(visits[i].transition)) ||
408        transition == ui::PAGE_TRANSITION_KEYWORD_GENERATED)
409      cur.typed_count++;
410  }
411
412  // Check each unique URL with deleted visits.
413  HistoryClient* history_client = GetHistoryClient();
414  for (std::map<URLID, ChangedURL>::const_iterator i = changed_urls.begin();
415       i != changed_urls.end(); ++i) {
416    // The unique URL rows should already be filled in.
417    URLRow& url_row = effects->affected_urls[i->first];
418    if (!url_row.id())
419      continue;  // URL row doesn't exist in the database.
420
421    // Check if there are any other visits for this URL and update the time
422    // (the time change may not actually be synced to disk below when we're
423    // archiving).
424    VisitRow last_visit;
425    if (main_db_->GetMostRecentVisitForURL(url_row.id(), &last_visit))
426      url_row.set_last_visit(last_visit.visit_time);
427    else
428      url_row.set_last_visit(base::Time());
429
430    // Don't delete URLs with visits still in the DB, or bookmarked.
431    bool is_bookmarked =
432        (history_client && history_client->IsBookmarked(url_row.url()));
433    if (!is_bookmarked && url_row.last_visit().is_null()) {
434      // Not bookmarked and no more visits. Nuke the url.
435      DeleteOneURL(url_row, is_bookmarked, effects);
436    } else {
437      // NOTE: The calls to std::max() below are a backstop, but they should
438      // never actually be needed unless the database is corrupt (I think).
439      url_row.set_visit_count(
440          std::max(0, url_row.visit_count() - i->second.visit_count));
441      url_row.set_typed_count(
442          std::max(0, url_row.typed_count() - i->second.typed_count));
443
444      // Update the db with the new details.
445      main_db_->UpdateURLRow(url_row.id(), url_row);
446
447      effects->modified_urls.push_back(url_row);
448    }
449  }
450}
451
452void ExpireHistoryBackend::ScheduleExpire() {
453  base::TimeDelta delay;
454  if (work_queue_.empty()) {
455    // If work queue is empty, reset the work queue to contain all tasks and
456    // schedule next iteration after a longer delay.
457    InitWorkQueue();
458    delay = base::TimeDelta::FromMinutes(kExpirationEmptyDelayMin);
459  } else {
460    delay = base::TimeDelta::FromSeconds(kExpirationDelaySec);
461  }
462
463  base::MessageLoop::current()->PostDelayedTask(
464      FROM_HERE,
465      base::Bind(&ExpireHistoryBackend::DoExpireIteration,
466                 weak_factory_.GetWeakPtr()),
467      delay);
468}
469
470void ExpireHistoryBackend::DoExpireIteration() {
471  DCHECK(!work_queue_.empty()) << "queue has to be non-empty";
472
473  const ExpiringVisitsReader* reader = work_queue_.front();
474  bool more_to_expire = ExpireSomeOldHistory(
475      GetCurrentExpirationTime(), reader, kNumExpirePerIteration);
476
477  work_queue_.pop();
478  // If there are more items to expire, add the reader back to the queue, thus
479  // creating a new task for future iterations.
480  if (more_to_expire)
481    work_queue_.push(reader);
482
483  ScheduleExpire();
484}
485
486bool ExpireHistoryBackend::ExpireSomeOldHistory(
487    base::Time end_time,
488    const ExpiringVisitsReader* reader,
489    int max_visits) {
490  if (!main_db_)
491    return false;
492
493  // Add an extra time unit to given end time, because
494  // GetAllVisitsInRange, et al. queries' end value is non-inclusive.
495  base::Time effective_end_time =
496      base::Time::FromInternalValue(end_time.ToInternalValue() + 1);
497
498  VisitVector deleted_visits;
499  bool more_to_expire = reader->Read(effective_end_time, main_db_,
500                                     &deleted_visits, max_visits);
501
502  DeleteEffects deleted_effects;
503  DeleteVisitRelatedInfo(deleted_visits, &deleted_effects);
504  ExpireURLsForVisits(deleted_visits, &deleted_effects);
505  DeleteFaviconsIfPossible(&deleted_effects);
506
507  BroadcastNotifications(&deleted_effects, DELETION_EXPIRED);
508
509  return more_to_expire;
510}
511
512void ExpireHistoryBackend::ParanoidExpireHistory() {
513  // TODO(brettw): Bug 1067331: write this to clean up any errors.
514}
515
516HistoryClient* ExpireHistoryBackend::GetHistoryClient() {
517  // We use the history client to determine if a URL is bookmarked. The data is
518  // loaded on a separate thread and may not be done by the time we get here.
519  // We therefore block until the bookmarks have finished loading.
520  if (history_client_)
521    history_client_->BlockUntilBookmarksLoaded();
522  return history_client_;
523}
524
525}  // namespace history
526