1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/history/typed_url_syncable_service.h"
6
7#include "base/auto_reset.h"
8#include "base/logging.h"
9#include "base/metrics/histogram.h"
10#include "base/strings/utf_string_conversions.h"
11#include "chrome/browser/history/history_backend.h"
12#include "net/base/net_util.h"
13#include "sync/protocol/sync.pb.h"
14#include "sync/protocol/typed_url_specifics.pb.h"
15
16namespace {
17
18// The server backend can't handle arbitrarily large node sizes, so to keep
19// the size under control we limit the visit array.
20static const int kMaxTypedUrlVisits = 100;
21
22// There's no limit on how many visits the history DB could have for a given
23// typed URL, so we limit how many we fetch from the DB to avoid crashes due to
24// running out of memory (http://crbug.com/89793). This value is different
25// from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be
26// RELOAD visits, which will be stripped.
27static const int kMaxVisitsToFetch = 1000;
28
29// This is the threshold at which we start throttling sync updates for typed
30// URLs - any URLs with a typed_count >= this threshold will be throttled.
31static const int kTypedUrlVisitThrottleThreshold = 10;
32
33// This is the multiple we use when throttling sync updates. If the multiple is
34// N, we sync up every Nth update (i.e. when typed_count % N == 0).
35static const int kTypedUrlVisitThrottleMultiple = 10;
36
37}  // namespace
38
39namespace history {
40
41const char kTypedUrlTag[] = "google_chrome_typed_urls";
42
43static bool CheckVisitOrdering(const VisitVector& visits) {
44  int64 previous_visit_time = 0;
45  for (VisitVector::const_iterator visit = visits.begin();
46       visit != visits.end(); ++visit) {
47    if (visit != visits.begin()) {
48      // We allow duplicate visits here - they shouldn't really be allowed, but
49      // they still seem to show up sometimes and we haven't figured out the
50      // source, so we just log an error instead of failing an assertion.
51      // (http://crbug.com/91473).
52      if (previous_visit_time == visit->visit_time.ToInternalValue())
53        DVLOG(1) << "Duplicate visit time encountered";
54      else if (previous_visit_time > visit->visit_time.ToInternalValue())
55        return false;
56    }
57
58    previous_visit_time = visit->visit_time.ToInternalValue();
59  }
60  return true;
61}
62
63TypedUrlSyncableService::TypedUrlSyncableService(
64    HistoryBackend* history_backend)
65    : history_backend_(history_backend),
66      processing_syncer_changes_(false),
67      expected_loop_(base::MessageLoop::current()) {
68  DCHECK(history_backend_);
69  DCHECK(expected_loop_ == base::MessageLoop::current());
70}
71
72TypedUrlSyncableService::~TypedUrlSyncableService() {
73  DCHECK(expected_loop_ == base::MessageLoop::current());
74}
75
76syncer::SyncMergeResult TypedUrlSyncableService::MergeDataAndStartSyncing(
77    syncer::ModelType type,
78    const syncer::SyncDataList& initial_sync_data,
79    scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
80    scoped_ptr<syncer::SyncErrorFactory> error_handler) {
81  DCHECK(expected_loop_ == base::MessageLoop::current());
82  DCHECK(!sync_processor_.get());
83  DCHECK(sync_processor.get());
84  DCHECK(error_handler.get());
85  DCHECK_EQ(type, syncer::TYPED_URLS);
86
87  syncer::SyncMergeResult merge_result(type);
88  sync_processor_ = sync_processor.Pass();
89  sync_error_handler_ = error_handler.Pass();
90
91  // TODO(mgist): Add implementation
92
93  return merge_result;
94}
95
96void TypedUrlSyncableService::StopSyncing(syncer::ModelType type) {
97  DCHECK(expected_loop_ == base::MessageLoop::current());
98  DCHECK_EQ(type, syncer::TYPED_URLS);
99
100  sync_processor_.reset();
101  sync_error_handler_.reset();
102}
103
104syncer::SyncDataList TypedUrlSyncableService::GetAllSyncData(
105    syncer::ModelType type) const {
106  DCHECK(expected_loop_ == base::MessageLoop::current());
107  syncer::SyncDataList list;
108
109  // TODO(mgist): Add implementation
110
111  return list;
112}
113
114syncer::SyncError TypedUrlSyncableService::ProcessSyncChanges(
115    const tracked_objects::Location& from_here,
116    const syncer::SyncChangeList& change_list) {
117  DCHECK(expected_loop_ == base::MessageLoop::current());
118
119  // TODO(mgist): Add implementation
120
121  return syncer::SyncError(FROM_HERE,
122                           syncer::SyncError::DATATYPE_ERROR,
123                           "Typed url syncable service is not implemented.",
124                           syncer::TYPED_URLS);
125}
126
127void TypedUrlSyncableService::OnUrlsModified(URLRows* changed_urls) {
128  DCHECK(expected_loop_ == base::MessageLoop::current());
129  DCHECK(changed_urls);
130
131  if (processing_syncer_changes_)
132    return;  // These are changes originating from us, ignore.
133  if (!sync_processor_.get())
134    return;  // Sync processor not yet initialized, don't sync.
135
136  // Create SyncChangeList.
137  syncer::SyncChangeList changes;
138
139  for (URLRows::iterator url = changed_urls->begin();
140       url != changed_urls->end(); ++url) {
141    // Only care if the modified URL is typed.
142    if (url->typed_count() > 0) {
143      // If there were any errors updating the sync node, just ignore them and
144      // continue on to process the next URL.
145      CreateOrUpdateSyncNode(*url, &changes);
146    }
147  }
148
149  // Send SyncChangeList to server if there are any changes.
150  if (changes.size() > 0)
151    sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
152}
153
154void TypedUrlSyncableService::OnUrlVisited(ui::PageTransition transition,
155                                           URLRow* row) {
156  DCHECK(expected_loop_ == base::MessageLoop::current());
157  DCHECK(row);
158
159  if (processing_syncer_changes_)
160    return;  // These are changes originating from us, ignore.
161  if (!sync_processor_.get())
162    return;  // Sync processor not yet initialized, don't sync.
163  if (!ShouldSyncVisit(transition, row))
164    return;
165
166  // Create SyncChangeList.
167  syncer::SyncChangeList changes;
168
169  CreateOrUpdateSyncNode(*row, &changes);
170
171  // Send SyncChangeList to server if there are any changes.
172  if (changes.size() > 0)
173    sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
174}
175
176void TypedUrlSyncableService::OnUrlsDeleted(bool all_history,
177                                            bool expired,
178                                            URLRows* rows) {
179  DCHECK(expected_loop_ == base::MessageLoop::current());
180
181  if (processing_syncer_changes_)
182    return;  // These are changes originating from us, ignore.
183  if (!sync_processor_.get())
184    return;  // Sync processor not yet initialized, don't sync.
185
186  // Ignore URLs expired due to old age (we don't want to sync them as deletions
187  // to avoid extra traffic up to the server, and also to make sure that a
188  // client with a bad clock setting won't go on an expiration rampage and
189  // delete all history from every client). The server will gracefully age out
190  // the sync DB entries when they've been idle for long enough.
191  if (expired)
192    return;
193
194  // Create SyncChangeList.
195  syncer::SyncChangeList changes;
196
197  if (all_history) {
198    // Delete all synced typed urls.
199    for (std::set<GURL>::const_iterator url = synced_typed_urls_.begin();
200         url != synced_typed_urls_.end(); ++url) {
201      VisitVector visits;
202      URLRow row(*url);
203      AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE,
204                              row, visits, url->spec(), &changes);
205    }
206    // Clear cache of server state.
207    synced_typed_urls_.clear();
208  } else {
209    DCHECK(rows);
210    // Delete rows.
211    for (URLRows::const_iterator row = rows->begin();
212         row != rows->end(); ++row) {
213      // Add specifics to change list for all synced urls that were deleted.
214      if (synced_typed_urls_.find(row->url()) != synced_typed_urls_.end()) {
215        VisitVector visits;
216        AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE,
217                                *row, visits, row->url().spec(), &changes);
218        // Delete typed url from cache.
219        synced_typed_urls_.erase(row->url());
220      }
221    }
222  }
223
224  // Send SyncChangeList to server if there are any changes.
225  if (changes.size() > 0)
226    sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
227}
228
229bool TypedUrlSyncableService::ShouldIgnoreUrl(const GURL& url) {
230  // Ignore empty URLs. Not sure how this can happen (maybe import from other
231  // busted browsers, or misuse of the history API, or just plain bugs) but we
232  // can't deal with them.
233  if (url.spec().empty())
234    return true;
235
236  // Ignore local file URLs.
237  if (url.SchemeIsFile())
238    return true;
239
240  // Ignore localhost URLs.
241  if (net::IsLocalhost(url.host()))
242    return true;
243
244  return false;
245}
246
247bool TypedUrlSyncableService::ShouldSyncVisit(
248    ui::PageTransition page_transition,
249    URLRow* row) {
250  if (!row)
251    return false;
252  int typed_count = row->typed_count();
253  ui::PageTransition transition = ui::PageTransitionFromInt(
254      page_transition & ui::PAGE_TRANSITION_CORE_MASK);
255
256  // Just use an ad-hoc criteria to determine whether to ignore this
257  // notification. For most users, the distribution of visits is roughly a bell
258  // curve with a long tail - there are lots of URLs with < 5 visits so we want
259  // to make sure we sync up every visit to ensure the proper ordering of
260  // suggestions. But there are relatively few URLs with > 10 visits, and those
261  // tend to be more broadly distributed such that there's no need to sync up
262  // every visit to preserve their relative ordering.
263  return (transition == ui::PAGE_TRANSITION_TYPED &&
264          typed_count > 0 &&
265          (typed_count < kTypedUrlVisitThrottleThreshold ||
266           (typed_count % kTypedUrlVisitThrottleMultiple) == 0));
267}
268
269bool TypedUrlSyncableService::CreateOrUpdateSyncNode(
270    URLRow url,
271    syncer::SyncChangeList* changes) {
272  DCHECK_GT(url.typed_count(), 0);
273
274  if (ShouldIgnoreUrl(url.url()))
275    return true;
276
277  // Get the visits for this node.
278  VisitVector visit_vector;
279  if (!FixupURLAndGetVisits(&url, &visit_vector)) {
280    DLOG(ERROR) << "Could not load visits for url: " << url.url();
281    return false;
282  }
283  DCHECK(!visit_vector.empty());
284
285  std::string title = url.url().spec();
286  syncer::SyncChange::SyncChangeType change_type;
287
288  // If server already has URL, then send a sync update, else add it.
289  change_type =
290      (synced_typed_urls_.find(url.url()) != synced_typed_urls_.end()) ?
291      syncer::SyncChange::ACTION_UPDATE :
292      syncer::SyncChange::ACTION_ADD;
293
294  // Ensure cache of server state is up to date.
295  synced_typed_urls_.insert(url.url());
296
297  AddTypedUrlToChangeList(change_type, url, visit_vector, title, changes);
298
299  return true;
300}
301
302void TypedUrlSyncableService::AddTypedUrlToChangeList(
303    syncer::SyncChange::SyncChangeType change_type,
304    const URLRow& row,
305    const VisitVector& visits,
306    std::string title,
307    syncer::SyncChangeList* change_list) {
308  sync_pb::EntitySpecifics entity_specifics;
309  sync_pb::TypedUrlSpecifics* typed_url = entity_specifics.mutable_typed_url();
310
311  if (change_type == syncer::SyncChange::ACTION_DELETE) {
312    typed_url->set_url(row.url().spec());
313  } else {
314    WriteToTypedUrlSpecifics(row, visits, typed_url);
315  }
316
317  change_list->push_back(
318      syncer::SyncChange(FROM_HERE, change_type,
319                         syncer::SyncData::CreateLocalData(
320                             kTypedUrlTag, title, entity_specifics)));
321}
322
323void TypedUrlSyncableService::WriteToTypedUrlSpecifics(
324    const URLRow& url,
325    const VisitVector& visits,
326    sync_pb::TypedUrlSpecifics* typed_url) {
327
328  DCHECK(!url.last_visit().is_null());
329  DCHECK(!visits.empty());
330  DCHECK_EQ(url.last_visit().ToInternalValue(),
331            visits.back().visit_time.ToInternalValue());
332
333  typed_url->set_url(url.url().spec());
334  typed_url->set_title(base::UTF16ToUTF8(url.title()));
335  typed_url->set_hidden(url.hidden());
336
337  DCHECK(CheckVisitOrdering(visits));
338
339  bool only_typed = false;
340  int skip_count = 0;
341
342  if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) {
343    int typed_count = 0;
344    int total = 0;
345    // Walk the passed-in visit vector and count the # of typed visits.
346    for (VisitVector::const_iterator visit = visits.begin();
347         visit != visits.end(); ++visit) {
348      ui::PageTransition transition = ui::PageTransitionFromInt(
349          visit->transition & ui::PAGE_TRANSITION_CORE_MASK);
350      // We ignore reload visits.
351      if (transition == ui::PAGE_TRANSITION_RELOAD)
352        continue;
353      ++total;
354      if (transition == ui::PAGE_TRANSITION_TYPED)
355        ++typed_count;
356    }
357    // We should have at least one typed visit. This can sometimes happen if
358    // the history DB has an inaccurate count for some reason (there's been
359    // bugs in the history code in the past which has left users in the wild
360    // with incorrect counts - http://crbug.com/84258).
361    DCHECK(typed_count > 0);
362
363    if (typed_count > kMaxTypedUrlVisits) {
364      only_typed = true;
365      skip_count = typed_count - kMaxTypedUrlVisits;
366    } else if (total > kMaxTypedUrlVisits) {
367      skip_count = total - kMaxTypedUrlVisits;
368    }
369  }
370
371  for (VisitVector::const_iterator visit = visits.begin();
372       visit != visits.end(); ++visit) {
373    ui::PageTransition transition =
374        ui::PageTransitionStripQualifier(visit->transition);
375    // Skip reload visits.
376    if (transition == ui::PAGE_TRANSITION_RELOAD)
377      continue;
378
379    // If we only have room for typed visits, then only add typed visits.
380    if (only_typed && transition != ui::PAGE_TRANSITION_TYPED)
381      continue;
382
383    if (skip_count > 0) {
384      // We have too many entries to fit, so we need to skip the oldest ones.
385      // Only skip typed URLs if there are too many typed URLs to fit.
386      if (only_typed || transition != ui::PAGE_TRANSITION_TYPED) {
387        --skip_count;
388        continue;
389      }
390    }
391    typed_url->add_visits(visit->visit_time.ToInternalValue());
392    typed_url->add_visit_transitions(visit->transition);
393  }
394  DCHECK_EQ(skip_count, 0);
395
396  if (typed_url->visits_size() == 0) {
397    // If we get here, it's because we don't actually have any TYPED visits
398    // even though the visit's typed_count > 0 (corrupted typed_count). So
399    // let's go ahead and add a RELOAD visit at the most recent visit since
400    // it's not legal to have an empty visit array (yet another workaround
401    // for http://crbug.com/84258).
402    typed_url->add_visits(url.last_visit().ToInternalValue());
403    typed_url->add_visit_transitions(ui::PAGE_TRANSITION_RELOAD);
404  }
405  CHECK_GT(typed_url->visits_size(), 0);
406  CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits);
407  CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size());
408}
409
410bool TypedUrlSyncableService::FixupURLAndGetVisits(
411    URLRow* url,
412    VisitVector* visits) {
413  ++num_db_accesses_;
414  CHECK(history_backend_);
415  if (!history_backend_->GetMostRecentVisitsForURL(
416          url->id(), kMaxVisitsToFetch, visits)) {
417    ++num_db_errors_;
418    return false;
419  }
420
421  // Sometimes (due to a bug elsewhere in the history or sync code, or due to
422  // a crash between adding a URL to the history database and updating the
423  // visit DB) the visit vector for a URL can be empty. If this happens, just
424  // create a new visit whose timestamp is the same as the last_visit time.
425  // This is a workaround for http://crbug.com/84258.
426  if (visits->empty()) {
427    DVLOG(1) << "Found empty visits for URL: " << url->url();
428    VisitRow visit(
429        url->id(), url->last_visit(), 0, ui::PAGE_TRANSITION_TYPED, 0);
430    visits->push_back(visit);
431  }
432
433  // GetMostRecentVisitsForURL() returns the data in the opposite order that
434  // we need it, so reverse it.
435  std::reverse(visits->begin(), visits->end());
436
437  // Sometimes, the last_visit field in the URL doesn't match the timestamp of
438  // the last visit in our visit array (they come from different tables, so
439  // crashes/bugs can cause them to mismatch), so just set it here.
440  url->set_last_visit(visits->back().visit_time);
441  DCHECK(CheckVisitOrdering(*visits));
442  return true;
443}
444
445}  // namespace history
446