1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/history/top_sites_impl.h"
6
7#include <algorithm>
8#include <set>
9
10#include "base/bind.h"
11#include "base/bind_helpers.h"
12#include "base/logging.h"
13#include "base/md5.h"
14#include "base/memory/ref_counted_memory.h"
15#include "base/message_loop/message_loop_proxy.h"
16#include "base/metrics/histogram.h"
17#include "base/prefs/pref_service.h"
18#include "base/prefs/scoped_user_pref_update.h"
19#include "base/strings/string_util.h"
20#include "base/strings/utf_string_conversions.h"
21#include "base/task_runner.h"
22#include "base/values.h"
23#include "chrome/browser/chrome_notification_types.h"
24#include "chrome/browser/history/history_backend.h"
25#include "chrome/browser/history/history_db_task.h"
26#include "chrome/browser/history/history_notifications.h"
27#include "chrome/browser/history/history_service_factory.h"
28#include "chrome/browser/history/top_sites_cache.h"
29#include "chrome/browser/history/url_utils.h"
30#include "chrome/browser/profiles/profile.h"
31#include "chrome/common/pref_names.h"
32#include "components/history/core/browser/page_usage_data.h"
33#include "components/history/core/common/thumbnail_score.h"
34#include "content/public/browser/browser_thread.h"
35#include "content/public/browser/navigation_controller.h"
36#include "content/public/browser/navigation_details.h"
37#include "content/public/browser/navigation_entry.h"
38#include "content/public/browser/notification_service.h"
39#include "content/public/browser/web_contents.h"
40#include "ui/base/l10n/l10n_util.h"
41#include "ui/base/layout.h"
42#include "ui/base/resource/resource_bundle.h"
43#include "ui/gfx/image/image_util.h"
44
45using base::DictionaryValue;
46using content::BrowserThread;
47using content::NavigationController;
48
49namespace history {
50
51namespace {
52
53void RunOrPostGetMostVisitedURLsCallback(
54    base::TaskRunner* task_runner,
55    bool include_forced_urls,
56    const TopSitesImpl::GetMostVisitedURLsCallback& callback,
57    const MostVisitedURLList& all_urls,
58    const MostVisitedURLList& nonforced_urls) {
59  const MostVisitedURLList* urls =
60      include_forced_urls ? &all_urls : &nonforced_urls;
61  if (task_runner->RunsTasksOnCurrentThread())
62    callback.Run(*urls);
63  else
64    task_runner->PostTask(FROM_HERE, base::Bind(callback, *urls));
65}
66
67// Compares two MostVisitedURL having a non-null |last_forced_time|.
68bool ForcedURLComparator(const MostVisitedURL& first,
69                         const MostVisitedURL& second) {
70  DCHECK(!first.last_forced_time.is_null() &&
71         !second.last_forced_time.is_null());
72  return first.last_forced_time < second.last_forced_time;
73}
74
75}  // namespace
76
77// How many non-forced top sites to store in the cache.
78static const size_t kNonForcedTopSitesNumber = 20;
79
80// How many forced top sites to store in the cache.
81static const size_t kForcedTopSitesNumber = 20;
82
83// Max number of temporary images we'll cache. See comment above
84// temp_images_ for details.
85static const size_t kMaxTempTopImages = 8;
86
87static const int kDaysOfHistory = 90;
88// Time from startup to first HistoryService query.
89static const int64 kUpdateIntervalSecs = 15;
90// Intervals between requests to HistoryService.
91static const int64 kMinUpdateIntervalMinutes = 1;
92static const int64 kMaxUpdateIntervalMinutes = 60;
93
94// Use 100 quality (highest quality) because we're very sensitive to
95// artifacts for these small sized, highly detailed images.
96static const int kTopSitesImageQuality = 100;
97
98TopSitesImpl::TopSitesImpl(Profile* profile)
99    : backend_(NULL),
100      cache_(new TopSitesCache()),
101      thread_safe_cache_(new TopSitesCache()),
102      profile_(profile),
103      last_num_urls_changed_(0),
104      loaded_(false) {
105  if (!profile_)
106    return;
107
108  if (content::NotificationService::current()) {
109    registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED,
110                   content::Source<Profile>(profile_));
111    // Listen for any nav commits. We'll ignore those not related to this
112    // profile when we get the notification.
113    registrar_.Add(this, content::NOTIFICATION_NAV_ENTRY_COMMITTED,
114                   content::NotificationService::AllSources());
115  }
116  for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) {
117    int url_id = kPrepopulatedPages[i].url_id;
118    prepopulated_page_urls_.push_back(
119        GURL(l10n_util::GetStringUTF8(url_id)));
120  }
121}
122
123void TopSitesImpl::Init(const base::FilePath& db_name) {
124  // Create the backend here, rather than in the constructor, so that
125  // unit tests that do not need the backend can run without a problem.
126  backend_ = new TopSitesBackend;
127  backend_->Init(db_name);
128  backend_->GetMostVisitedThumbnails(
129      base::Bind(&TopSitesImpl::OnGotMostVisitedThumbnails,
130                 base::Unretained(this)),
131      &cancelable_task_tracker_);
132}
133
134bool TopSitesImpl::SetPageThumbnail(const GURL& url,
135                                    const gfx::Image& thumbnail,
136                                    const ThumbnailScore& score) {
137  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
138
139  if (!loaded_) {
140    // TODO(sky): I need to cache these and apply them after the load
141    // completes.
142    return false;
143  }
144
145  bool add_temp_thumbnail = false;
146  if (!IsKnownURL(url)) {
147    if (!IsNonForcedFull()) {
148      add_temp_thumbnail = true;
149    } else {
150      return false;  // This URL is not known to us.
151    }
152  }
153
154  if (!HistoryService::CanAddURL(url))
155    return false;  // It's not a real webpage.
156
157  scoped_refptr<base::RefCountedBytes> thumbnail_data;
158  if (!EncodeBitmap(thumbnail, &thumbnail_data))
159    return false;
160
161  if (add_temp_thumbnail) {
162    // Always remove the existing entry and then add it back. That way if we end
163    // up with too many temp thumbnails we'll prune the oldest first.
164    RemoveTemporaryThumbnailByURL(url);
165    AddTemporaryThumbnail(url, thumbnail_data.get(), score);
166    return true;
167  }
168
169  return SetPageThumbnailEncoded(url, thumbnail_data.get(), score);
170}
171
172bool TopSitesImpl::SetPageThumbnailToJPEGBytes(
173    const GURL& url,
174    const base::RefCountedMemory* memory,
175    const ThumbnailScore& score) {
176  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
177
178  if (!loaded_) {
179    // TODO(sky): I need to cache these and apply them after the load
180    // completes.
181    return false;
182  }
183
184  bool add_temp_thumbnail = false;
185  if (!IsKnownURL(url)) {
186    if (!IsNonForcedFull()) {
187      add_temp_thumbnail = true;
188    } else {
189      return false;  // This URL is not known to us.
190    }
191  }
192
193  if (!HistoryService::CanAddURL(url))
194    return false;  // It's not a real webpage.
195
196  if (add_temp_thumbnail) {
197    // Always remove the existing entry and then add it back. That way if we end
198    // up with too many temp thumbnails we'll prune the oldest first.
199    RemoveTemporaryThumbnailByURL(url);
200    AddTemporaryThumbnail(url, memory, score);
201    return true;
202  }
203
204  return SetPageThumbnailEncoded(url, memory, score);
205}
206
207// WARNING: this function may be invoked on any thread.
208void TopSitesImpl::GetMostVisitedURLs(
209    const GetMostVisitedURLsCallback& callback,
210    bool include_forced_urls) {
211  MostVisitedURLList filtered_urls;
212  {
213    base::AutoLock lock(lock_);
214    if (!loaded_) {
215      // A request came in before we finished loading. Store the callback and
216      // we'll run it on current thread when we finish loading.
217      pending_callbacks_.push_back(
218          base::Bind(&RunOrPostGetMostVisitedURLsCallback,
219                     base::MessageLoopProxy::current(),
220                     include_forced_urls,
221                     callback));
222      return;
223    }
224    if (include_forced_urls) {
225      filtered_urls = thread_safe_cache_->top_sites();
226    } else {
227      filtered_urls.assign(thread_safe_cache_->top_sites().begin() +
228                              thread_safe_cache_->GetNumForcedURLs(),
229                           thread_safe_cache_->top_sites().end());
230    }
231  }
232  callback.Run(filtered_urls);
233}
234
235bool TopSitesImpl::GetPageThumbnail(
236    const GURL& url,
237    bool prefix_match,
238    scoped_refptr<base::RefCountedMemory>* bytes) {
239  // WARNING: this may be invoked on any thread.
240  // Perform exact match.
241  {
242    base::AutoLock lock(lock_);
243    if (thread_safe_cache_->GetPageThumbnail(url, bytes))
244      return true;
245  }
246
247  // Resource bundle is thread safe.
248  for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) {
249    if (url == prepopulated_page_urls_[i]) {
250      *bytes = ResourceBundle::GetSharedInstance().
251          LoadDataResourceBytesForScale(
252              kPrepopulatedPages[i].thumbnail_id,
253              ui::SCALE_FACTOR_100P);
254      return true;
255    }
256  }
257
258  if (prefix_match) {
259    // If http or https, search with |url| first, then try the other one.
260    std::vector<GURL> url_list;
261    url_list.push_back(url);
262    if (url.SchemeIsHTTPOrHTTPS())
263      url_list.push_back(ToggleHTTPAndHTTPS(url));
264
265    for (std::vector<GURL>::iterator it = url_list.begin();
266         it != url_list.end(); ++it) {
267      base::AutoLock lock(lock_);
268
269      GURL canonical_url;
270      // Test whether any stored URL is a prefix of |url|.
271      canonical_url = thread_safe_cache_->GetGeneralizedCanonicalURL(*it);
272      if (!canonical_url.is_empty() &&
273          thread_safe_cache_->GetPageThumbnail(canonical_url, bytes)) {
274        return true;
275      }
276    }
277  }
278
279  return false;
280}
281
282bool TopSitesImpl::GetPageThumbnailScore(const GURL& url,
283                                         ThumbnailScore* score) {
284  // WARNING: this may be invoked on any thread.
285  base::AutoLock lock(lock_);
286  return thread_safe_cache_->GetPageThumbnailScore(url, score);
287}
288
289bool TopSitesImpl::GetTemporaryPageThumbnailScore(const GURL& url,
290                                                  ThumbnailScore* score) {
291  for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end();
292       ++i) {
293    if (i->first == url) {
294      *score = i->second.thumbnail_score;
295      return true;
296    }
297  }
298  return false;
299}
300
301
302// Returns the index of |url| in |urls|, or -1 if not found.
303static int IndexOf(const MostVisitedURLList& urls, const GURL& url) {
304  for (size_t i = 0; i < urls.size(); i++) {
305    if (urls[i].url == url)
306      return i;
307  }
308  return -1;
309}
310
311void TopSitesImpl::SyncWithHistory() {
312  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
313  if (loaded_ && temp_images_.size()) {
314    // If we have temporary thumbnails it means there isn't much data, and most
315    // likely the user is first running Chrome. During this time we throttle
316    // updating from history by 30 seconds. If the user creates a new tab page
317    // during this window of time we force updating from history so that the new
318    // tab page isn't so far out of date.
319    timer_.Stop();
320    StartQueryForMostVisited();
321  }
322}
323
324bool TopSitesImpl::HasBlacklistedItems() const {
325  const base::DictionaryValue* blacklist =
326      profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
327  return blacklist && !blacklist->empty();
328}
329
330void TopSitesImpl::AddBlacklistedURL(const GURL& url) {
331  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
332
333  base::Value* dummy = base::Value::CreateNullValue();
334  {
335    DictionaryPrefUpdate update(profile_->GetPrefs(),
336                                prefs::kNtpMostVisitedURLsBlacklist);
337    base::DictionaryValue* blacklist = update.Get();
338    blacklist->SetWithoutPathExpansion(GetURLHash(url), dummy);
339  }
340
341  ResetThreadSafeCache();
342  NotifyTopSitesChanged();
343}
344
345void TopSitesImpl::RemoveBlacklistedURL(const GURL& url) {
346  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
347  {
348    DictionaryPrefUpdate update(profile_->GetPrefs(),
349                                prefs::kNtpMostVisitedURLsBlacklist);
350    base::DictionaryValue* blacklist = update.Get();
351    blacklist->RemoveWithoutPathExpansion(GetURLHash(url), NULL);
352  }
353  ResetThreadSafeCache();
354  NotifyTopSitesChanged();
355}
356
357bool TopSitesImpl::IsBlacklisted(const GURL& url) {
358  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
359  const base::DictionaryValue* blacklist =
360      profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
361  return blacklist && blacklist->HasKey(GetURLHash(url));
362}
363
364void TopSitesImpl::ClearBlacklistedURLs() {
365  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
366  {
367    DictionaryPrefUpdate update(profile_->GetPrefs(),
368                                prefs::kNtpMostVisitedURLsBlacklist);
369    base::DictionaryValue* blacklist = update.Get();
370    blacklist->Clear();
371  }
372  ResetThreadSafeCache();
373  NotifyTopSitesChanged();
374}
375
376void TopSitesImpl::Shutdown() {
377  profile_ = NULL;
378  // Cancel all requests so that the service doesn't callback to us after we've
379  // invoked Shutdown (this could happen if we have a pending request and
380  // Shutdown is invoked).
381  cancelable_task_tracker_.TryCancelAll();
382  backend_->Shutdown();
383}
384
385// static
386void TopSitesImpl::DiffMostVisited(const MostVisitedURLList& old_list,
387                                   const MostVisitedURLList& new_list,
388                                   TopSitesDelta* delta) {
389
390  // Add all the old URLs for quick lookup. This maps URLs to the corresponding
391  // index in the input.
392  std::map<GURL, size_t> all_old_urls;
393  size_t num_old_forced = 0;
394  for (size_t i = 0; i < old_list.size(); i++) {
395    if (!old_list[i].last_forced_time.is_null())
396      num_old_forced++;
397    DCHECK(old_list[i].last_forced_time.is_null() || i < num_old_forced)
398        << "Forced URLs must all appear before non-forced URLs.";
399    all_old_urls[old_list[i].url] = i;
400  }
401
402  // Check all the URLs in the new set to see which ones are new or just moved.
403  // When we find a match in the old set, we'll reset its index to our special
404  // marker. This allows us to quickly identify the deleted ones in a later
405  // pass.
406  const size_t kAlreadyFoundMarker = static_cast<size_t>(-1);
407  int rank = -1;  // Forced URLs have a rank of -1.
408  for (size_t i = 0; i < new_list.size(); i++) {
409    // Increase the rank if we're going through forced URLs. This works because
410    // non-forced URLs all come after forced URLs.
411    if (new_list[i].last_forced_time.is_null())
412      rank++;
413    DCHECK(new_list[i].last_forced_time.is_null() == (rank != -1))
414        << "Forced URLs must all appear before non-forced URLs.";
415    std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url);
416    if (found == all_old_urls.end()) {
417      MostVisitedURLWithRank added;
418      added.url = new_list[i];
419      added.rank = rank;
420      delta->added.push_back(added);
421    } else {
422      DCHECK(found->second != kAlreadyFoundMarker)
423          << "Same URL appears twice in the new list.";
424      int old_rank = found->second >= num_old_forced ?
425          found->second - num_old_forced : -1;
426      if (old_rank != rank ||
427          old_list[found->second].last_forced_time !=
428              new_list[i].last_forced_time) {
429        MostVisitedURLWithRank moved;
430        moved.url = new_list[i];
431        moved.rank = rank;
432        delta->moved.push_back(moved);
433      }
434      found->second = kAlreadyFoundMarker;
435    }
436  }
437
438  // Any member without the special marker in the all_old_urls list means that
439  // there wasn't a "new" URL that mapped to it, so it was deleted.
440  for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin();
441       i != all_old_urls.end(); ++i) {
442    if (i->second != kAlreadyFoundMarker)
443      delta->deleted.push_back(old_list[i->second]);
444  }
445}
446
447base::CancelableTaskTracker::TaskId TopSitesImpl::StartQueryForMostVisited() {
448  DCHECK(loaded_);
449  if (!profile_)
450    return base::CancelableTaskTracker::kBadTaskId;
451
452  HistoryService* hs = HistoryServiceFactory::GetForProfile(
453      profile_, Profile::EXPLICIT_ACCESS);
454  // |hs| may be null during unit tests.
455  if (hs) {
456    return hs->QueryMostVisitedURLs(
457        num_results_to_request_from_history(),
458        kDaysOfHistory,
459        base::Bind(&TopSitesImpl::OnTopSitesAvailableFromHistory,
460                   base::Unretained(this)),
461        &cancelable_task_tracker_);
462  }
463  return base::CancelableTaskTracker::kBadTaskId;
464}
465
466bool TopSitesImpl::IsKnownURL(const GURL& url) {
467  return loaded_ && cache_->IsKnownURL(url);
468}
469
470const std::string& TopSitesImpl::GetCanonicalURLString(const GURL& url) const {
471  return cache_->GetCanonicalURL(url).spec();
472}
473
474bool TopSitesImpl::IsNonForcedFull() {
475  return loaded_ && cache_->GetNumNonForcedURLs() >= kNonForcedTopSitesNumber;
476}
477
478bool TopSitesImpl::IsForcedFull() {
479  return loaded_ && cache_->GetNumForcedURLs() >= kForcedTopSitesNumber;
480}
481
482TopSitesImpl::~TopSitesImpl() {
483}
484
485bool TopSitesImpl::SetPageThumbnailNoDB(
486    const GURL& url,
487    const base::RefCountedMemory* thumbnail_data,
488    const ThumbnailScore& score) {
489  // This should only be invoked when we know about the url.
490  DCHECK(cache_->IsKnownURL(url));
491
492  const MostVisitedURL& most_visited =
493      cache_->top_sites()[cache_->GetURLIndex(url)];
494  Images* image = cache_->GetImage(url);
495
496  // When comparing the thumbnail scores, we need to take into account the
497  // redirect hops, which are not generated when the thumbnail is because the
498  // redirects weren't known. We fill that in here since we know the redirects.
499  ThumbnailScore new_score_with_redirects(score);
500  new_score_with_redirects.redirect_hops_from_dest =
501      GetRedirectDistanceForURL(most_visited, url);
502
503  if (!ShouldReplaceThumbnailWith(image->thumbnail_score,
504                                  new_score_with_redirects) &&
505      image->thumbnail.get())
506    return false;  // The one we already have is better.
507
508  image->thumbnail = const_cast<base::RefCountedMemory*>(thumbnail_data);
509  image->thumbnail_score = new_score_with_redirects;
510
511  ResetThreadSafeImageCache();
512  return true;
513}
514
515bool TopSitesImpl::SetPageThumbnailEncoded(
516    const GURL& url,
517    const base::RefCountedMemory* thumbnail,
518    const ThumbnailScore& score) {
519  if (!SetPageThumbnailNoDB(url, thumbnail, score))
520    return false;
521
522  // Update the database.
523  if (!cache_->IsKnownURL(url))
524    return false;
525
526  size_t index = cache_->GetURLIndex(url);
527  int url_rank = index - cache_->GetNumForcedURLs();
528  const MostVisitedURL& most_visited = cache_->top_sites()[index];
529  backend_->SetPageThumbnail(most_visited,
530                             url_rank < 0 ? -1 : url_rank,
531                             *(cache_->GetImage(most_visited.url)));
532  return true;
533}
534
535// static
536bool TopSitesImpl::EncodeBitmap(const gfx::Image& bitmap,
537                                scoped_refptr<base::RefCountedBytes>* bytes) {
538  if (bitmap.IsEmpty())
539    return false;
540  *bytes = new base::RefCountedBytes();
541  std::vector<unsigned char> data;
542  if (!gfx::JPEG1xEncodedDataFromImage(bitmap, kTopSitesImageQuality, &data))
543    return false;
544
545  // As we're going to cache this data, make sure the vector is only as big as
546  // it needs to be, as JPEGCodec::Encode() over-allocates data.capacity().
547  // (In a C++0x future, we can just call shrink_to_fit() in Encode())
548  (*bytes)->data() = data;
549  return true;
550}
551
552void TopSitesImpl::RemoveTemporaryThumbnailByURL(const GURL& url) {
553  for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end();
554       ++i) {
555    if (i->first == url) {
556      temp_images_.erase(i);
557      return;
558    }
559  }
560}
561
562void TopSitesImpl::AddTemporaryThumbnail(
563    const GURL& url,
564    const base::RefCountedMemory* thumbnail,
565    const ThumbnailScore& score) {
566  if (temp_images_.size() == kMaxTempTopImages)
567    temp_images_.erase(temp_images_.begin());
568
569  TempImage image;
570  image.first = url;
571  image.second.thumbnail = const_cast<base::RefCountedMemory*>(thumbnail);
572  image.second.thumbnail_score = score;
573  temp_images_.push_back(image);
574}
575
576void TopSitesImpl::TimerFired() {
577  StartQueryForMostVisited();
578}
579
580// static
581int TopSitesImpl::GetRedirectDistanceForURL(const MostVisitedURL& most_visited,
582                                            const GURL& url) {
583  for (size_t i = 0; i < most_visited.redirects.size(); i++) {
584    if (most_visited.redirects[i] == url)
585      return static_cast<int>(most_visited.redirects.size() - i - 1);
586  }
587  NOTREACHED() << "URL should always be found.";
588  return 0;
589}
590
591MostVisitedURLList TopSitesImpl::GetPrepopulatePages() {
592  MostVisitedURLList urls;
593  urls.resize(arraysize(kPrepopulatedPages));
594  for (size_t i = 0; i < urls.size(); ++i) {
595    MostVisitedURL& url = urls[i];
596    url.url = GURL(prepopulated_page_urls_[i]);
597    url.redirects.push_back(url.url);
598    url.title = l10n_util::GetStringUTF16(kPrepopulatedPages[i].title_id);
599  }
600  return urls;
601}
602
603bool TopSitesImpl::loaded() const {
604  return loaded_;
605}
606
607bool TopSitesImpl::AddForcedURL(const GURL& url, const base::Time& time) {
608  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
609  size_t num_forced = cache_->GetNumForcedURLs();
610  MostVisitedURLList new_list(cache_->top_sites());
611  MostVisitedURL new_url;
612
613  if (cache_->IsKnownURL(url)) {
614    size_t index = cache_->GetURLIndex(url);
615    // Do nothing if we currently have that URL as non-forced.
616    if (new_list[index].last_forced_time.is_null())
617      return false;
618
619    // Update the |last_forced_time| of the already existing URL. Delete it and
620    // reinsert it at the right location.
621    new_url = new_list[index];
622    new_list.erase(new_list.begin() + index);
623    num_forced--;
624  } else {
625    new_url.url = url;
626    new_url.redirects.push_back(url);
627  }
628  new_url.last_forced_time = time;
629  // Add forced URLs and sort. Added to the end of the list of forced URLs
630  // since this is almost always where it needs to go, unless the user's local
631  // clock is fiddled with.
632  MostVisitedURLList::iterator mid = new_list.begin() + num_forced;
633  new_list.insert(mid, new_url);
634  mid = new_list.begin() + num_forced;  // Mid was invalidated.
635  std::inplace_merge(new_list.begin(), mid, mid + 1, ForcedURLComparator);
636  SetTopSites(new_list);
637  return true;
638}
639
640bool TopSitesImpl::AddPrepopulatedPages(MostVisitedURLList* urls,
641                                        size_t num_forced_urls) {
642  bool added = false;
643  MostVisitedURLList prepopulate_urls = GetPrepopulatePages();
644  for (size_t i = 0; i < prepopulate_urls.size(); ++i) {
645    if (urls->size() - num_forced_urls < kNonForcedTopSitesNumber &&
646        IndexOf(*urls, prepopulate_urls[i].url) == -1) {
647      urls->push_back(prepopulate_urls[i]);
648      added = true;
649    }
650  }
651  return added;
652}
653
654size_t TopSitesImpl::MergeCachedForcedURLs(MostVisitedURLList* new_list) {
655  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
656  // Add all the new URLs for quick lookup. Take that opportunity to count the
657  // number of forced URLs in |new_list|.
658  std::set<GURL> all_new_urls;
659  size_t num_forced = 0;
660  for (size_t i = 0; i < new_list->size(); ++i) {
661    for (size_t j = 0; j < (*new_list)[i].redirects.size(); j++) {
662      all_new_urls.insert((*new_list)[i].redirects[j]);
663    }
664    if (!(*new_list)[i].last_forced_time.is_null())
665      ++num_forced;
666  }
667
668  // Keep the forced URLs from |cache_| that are not found in |new_list|.
669  MostVisitedURLList filtered_forced_urls;
670  for (size_t i = 0; i < cache_->GetNumForcedURLs(); ++i) {
671    if (all_new_urls.find(cache_->top_sites()[i].url) == all_new_urls.end())
672      filtered_forced_urls.push_back(cache_->top_sites()[i]);
673  }
674  num_forced += filtered_forced_urls.size();
675
676  // Prepend forced URLs and sort in order of ascending |last_forced_time|.
677  new_list->insert(new_list->begin(), filtered_forced_urls.begin(),
678                   filtered_forced_urls.end());
679  std::inplace_merge(
680      new_list->begin(), new_list->begin() + filtered_forced_urls.size(),
681      new_list->begin() + num_forced, ForcedURLComparator);
682
683  // Drop older forced URLs if the list overflows. Since forced URLs are always
684  // sort in increasing order of |last_forced_time|, drop the first ones.
685  if (num_forced > kForcedTopSitesNumber) {
686    new_list->erase(new_list->begin(),
687                    new_list->begin() + (num_forced - kForcedTopSitesNumber));
688    num_forced = kForcedTopSitesNumber;
689  }
690
691  return num_forced;
692}
693
694void TopSitesImpl::ApplyBlacklist(const MostVisitedURLList& urls,
695                                  MostVisitedURLList* out) {
696  // Log the number of times ApplyBlacklist is called so we can compute the
697  // average number of blacklisted items per user.
698  const base::DictionaryValue* blacklist =
699      profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
700  UMA_HISTOGRAM_BOOLEAN("TopSites.NumberOfApplyBlacklist", true);
701  UMA_HISTOGRAM_COUNTS_100("TopSites.NumberOfBlacklistedItems",
702      (blacklist ? blacklist->size() : 0));
703  size_t num_non_forced_urls = 0;
704  size_t num_forced_urls = 0;
705  for (size_t i = 0; i < urls.size(); ++i) {
706    if (!IsBlacklisted(urls[i].url)) {
707      if (urls[i].last_forced_time.is_null()) {
708        // Non-forced URL.
709        if (num_non_forced_urls >= kNonForcedTopSitesNumber)
710          continue;
711        num_non_forced_urls++;
712      } else {
713        // Forced URL.
714        if (num_forced_urls >= kForcedTopSitesNumber)
715          continue;
716        num_forced_urls++;
717      }
718      out->push_back(urls[i]);
719    }
720  }
721}
722
723std::string TopSitesImpl::GetURLHash(const GURL& url) {
724  // We don't use canonical URLs here to be able to blacklist only one of
725  // the two 'duplicate' sites, e.g. 'gmail.com' and 'mail.google.com'.
726  return base::MD5String(url.spec());
727}
728
729base::TimeDelta TopSitesImpl::GetUpdateDelay() {
730  if (cache_->top_sites().size() <= arraysize(kPrepopulatedPages))
731    return base::TimeDelta::FromSeconds(30);
732
733  int64 range = kMaxUpdateIntervalMinutes - kMinUpdateIntervalMinutes;
734  int64 minutes = kMaxUpdateIntervalMinutes -
735      last_num_urls_changed_ * range / cache_->top_sites().size();
736  return base::TimeDelta::FromMinutes(minutes);
737}
738
739void TopSitesImpl::Observe(int type,
740                           const content::NotificationSource& source,
741                           const content::NotificationDetails& details) {
742  if (!loaded_)
743    return;
744
745  if (type == chrome::NOTIFICATION_HISTORY_URLS_DELETED) {
746    content::Details<history::URLsDeletedDetails> deleted_details(details);
747    if (deleted_details->all_history) {
748      SetTopSites(MostVisitedURLList());
749      backend_->ResetDatabase();
750    } else {
751      std::set<size_t> indices_to_delete;  // Indices into top_sites_.
752      for (URLRows::const_iterator i = deleted_details->rows.begin();
753           i != deleted_details->rows.end(); ++i) {
754        if (cache_->IsKnownURL(i->url()))
755          indices_to_delete.insert(cache_->GetURLIndex(i->url()));
756      }
757
758      if (indices_to_delete.empty())
759        return;
760
761      MostVisitedURLList new_top_sites(cache_->top_sites());
762      for (std::set<size_t>::reverse_iterator i = indices_to_delete.rbegin();
763           i != indices_to_delete.rend(); i++) {
764        new_top_sites.erase(new_top_sites.begin() + *i);
765      }
766      SetTopSites(new_top_sites);
767    }
768    StartQueryForMostVisited();
769  } else if (type == content::NOTIFICATION_NAV_ENTRY_COMMITTED) {
770    NavigationController* controller =
771        content::Source<NavigationController>(source).ptr();
772    Profile* profile = Profile::FromBrowserContext(
773        controller->GetWebContents()->GetBrowserContext());
774    if (profile == profile_ && !IsNonForcedFull()) {
775      content::LoadCommittedDetails* load_details =
776          content::Details<content::LoadCommittedDetails>(details).ptr();
777      if (!load_details)
778        return;
779      const GURL& url = load_details->entry->GetURL();
780      if (!cache_->IsKnownURL(url) && HistoryService::CanAddURL(url)) {
781        // To avoid slamming history we throttle requests when the url updates.
782        // To do otherwise negatively impacts perf tests.
783        RestartQueryForTopSitesTimer(GetUpdateDelay());
784      }
785    }
786  }
787}
788
789void TopSitesImpl::SetTopSites(const MostVisitedURLList& new_top_sites) {
790  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
791
792  MostVisitedURLList top_sites(new_top_sites);
793  size_t num_forced_urls = MergeCachedForcedURLs(&top_sites);
794  AddPrepopulatedPages(&top_sites, num_forced_urls);
795
796  TopSitesDelta delta;
797  DiffMostVisited(cache_->top_sites(), top_sites, &delta);
798  if (!delta.deleted.empty() || !delta.added.empty() || !delta.moved.empty()) {
799    backend_->UpdateTopSites(delta);
800  }
801
802  last_num_urls_changed_ = delta.added.size() + delta.moved.size();
803
804  // We always do the following steps (setting top sites in cache, and resetting
805  // thread safe cache ...) as this method is invoked during startup at which
806  // point the caches haven't been updated yet.
807  cache_->SetTopSites(top_sites);
808
809  // See if we have any tmp thumbnails for the new sites.
810  if (!temp_images_.empty()) {
811    for (size_t i = 0; i < top_sites.size(); ++i) {
812      const MostVisitedURL& mv = top_sites[i];
813      GURL canonical_url = cache_->GetCanonicalURL(mv.url);
814      // At the time we get the thumbnail redirects aren't known, so we have to
815      // iterate through all the images.
816      for (TempImages::iterator it = temp_images_.begin();
817           it != temp_images_.end(); ++it) {
818        if (canonical_url == cache_->GetCanonicalURL(it->first)) {
819          SetPageThumbnailEncoded(
820              mv.url, it->second.thumbnail.get(), it->second.thumbnail_score);
821          temp_images_.erase(it);
822          break;
823        }
824      }
825    }
826  }
827
828  if (top_sites.size() - num_forced_urls >= kNonForcedTopSitesNumber)
829    temp_images_.clear();
830
831  ResetThreadSafeCache();
832  ResetThreadSafeImageCache();
833  NotifyTopSitesChanged();
834
835  // Restart the timer that queries history for top sites. This is done to
836  // ensure we stay in sync with history.
837  RestartQueryForTopSitesTimer(GetUpdateDelay());
838}
839
840int TopSitesImpl::num_results_to_request_from_history() const {
841  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
842
843  const base::DictionaryValue* blacklist =
844      profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
845  return kNonForcedTopSitesNumber + (blacklist ? blacklist->size() : 0);
846}
847
848void TopSitesImpl::MoveStateToLoaded() {
849  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
850
851  MostVisitedURLList filtered_urls_all;
852  MostVisitedURLList filtered_urls_nonforced;
853  PendingCallbacks pending_callbacks;
854  {
855    base::AutoLock lock(lock_);
856
857    if (loaded_)
858      return;  // Don't do anything if we're already loaded.
859    loaded_ = true;
860
861    // Now that we're loaded we can service the queued up callbacks. Copy them
862    // here and service them outside the lock.
863    if (!pending_callbacks_.empty()) {
864      // We always filter out forced URLs because callers of GetMostVisitedURLs
865      // are not interested in them.
866      filtered_urls_all = thread_safe_cache_->top_sites();
867      filtered_urls_nonforced.assign(thread_safe_cache_->top_sites().begin() +
868                                       thread_safe_cache_->GetNumForcedURLs(),
869                                     thread_safe_cache_->top_sites().end());
870      pending_callbacks.swap(pending_callbacks_);
871    }
872  }
873
874  for (size_t i = 0; i < pending_callbacks.size(); i++)
875    pending_callbacks[i].Run(filtered_urls_all, filtered_urls_nonforced);
876
877  NotifyTopSitesLoaded();
878}
879
880void TopSitesImpl::ResetThreadSafeCache() {
881  base::AutoLock lock(lock_);
882  MostVisitedURLList cached;
883  ApplyBlacklist(cache_->top_sites(), &cached);
884  thread_safe_cache_->SetTopSites(cached);
885}
886
887void TopSitesImpl::ResetThreadSafeImageCache() {
888  base::AutoLock lock(lock_);
889  thread_safe_cache_->SetThumbnails(cache_->images());
890}
891
892void TopSitesImpl::RestartQueryForTopSitesTimer(base::TimeDelta delta) {
893  if (timer_.IsRunning() && ((timer_start_time_ + timer_.GetCurrentDelay()) <
894                             (base::TimeTicks::Now() + delta))) {
895    return;
896  }
897
898  timer_start_time_ = base::TimeTicks::Now();
899  timer_.Stop();
900  timer_.Start(FROM_HERE, delta, this, &TopSitesImpl::TimerFired);
901}
902
903void TopSitesImpl::OnGotMostVisitedThumbnails(
904    const scoped_refptr<MostVisitedThumbnails>& thumbnails) {
905  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
906
907  // Set the top sites directly in the cache so that SetTopSites diffs
908  // correctly.
909  cache_->SetTopSites(thumbnails->most_visited);
910  SetTopSites(thumbnails->most_visited);
911  cache_->SetThumbnails(thumbnails->url_to_images_map);
912
913  ResetThreadSafeImageCache();
914
915  MoveStateToLoaded();
916
917  // Start a timer that refreshes top sites from history.
918  RestartQueryForTopSitesTimer(
919      base::TimeDelta::FromSeconds(kUpdateIntervalSecs));
920}
921
922void TopSitesImpl::OnTopSitesAvailableFromHistory(
923    const MostVisitedURLList* pages) {
924  DCHECK(pages);
925  SetTopSites(*pages);
926}
927
928}  // namespace history
929