1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/visitedlink/browser/visitedlink_master.h"
6
7#if defined(OS_WIN)
8#include <windows.h>
9#include <io.h>
10#include <shlobj.h>
11#endif  // defined(OS_WIN)
12#include <stdio.h>
13
14#include <algorithm>
15
16#include "base/bind.h"
17#include "base/bind_helpers.h"
18#include "base/containers/stack_container.h"
19#include "base/files/file_util.h"
20#include "base/files/scoped_file.h"
21#include "base/logging.h"
22#include "base/message_loop/message_loop.h"
23#include "base/path_service.h"
24#include "base/rand_util.h"
25#include "base/strings/string_util.h"
26#include "base/threading/thread_restrictions.h"
27#include "components/visitedlink/browser/visitedlink_delegate.h"
28#include "components/visitedlink/browser/visitedlink_event_listener.h"
29#include "content/public/browser/browser_context.h"
30#include "content/public/browser/browser_thread.h"
31#include "url/gurl.h"
32
33using content::BrowserThread;
34
35namespace visitedlink {
36
37const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
38const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
39const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
40const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
41const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
42
43const int32 VisitedLinkMaster::kFileCurrentVersion = 3;
44
45// the signature at the beginning of the URL table = "VLnk" (visited links)
46const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
47const size_t VisitedLinkMaster::kFileHeaderSize =
48    kFileHeaderSaltOffset + LINK_SALT_LENGTH;
49
50// This value should also be the same as the smallest size in the lookup
51// table in NewTableSizeForCount (prime number).
52const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
53
54const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
55
56namespace {
57
58// Fills the given salt structure with some quasi-random values
59// It is not necessary to generate a cryptographically strong random string,
60// only that it be reasonably different for different users.
61void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
62  DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
63  uint64 randval = base::RandUint64();
64  memcpy(salt, &randval, 8);
65}
66
67// Opens file on a background thread to not block UI thread.
68void AsyncOpen(FILE** file, const base::FilePath& filename) {
69  *file = base::OpenFile(filename, "wb+");
70  DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value();
71}
72
73// Returns true if the write was complete.
74static bool WriteToFile(FILE* file,
75                        off_t offset,
76                        const void* data,
77                        size_t data_len) {
78  if (fseek(file, offset, SEEK_SET) != 0)
79    return false;  // Don't write to an invalid part of the file.
80
81  size_t num_written = fwrite(data, 1, data_len, file);
82
83  // The write may not make it to the kernel (stdlib may buffer the write)
84  // until the next fseek/fclose call.  If we crash, it's easy for our used
85  // item count to be out of sync with the number of hashes we write.
86  // Protect against this by calling fflush.
87  int ret = fflush(file);
88  DCHECK_EQ(0, ret);
89  return num_written == data_len;
90}
91
92// This task executes on a background thread and executes a write. This
93// prevents us from blocking the UI thread doing I/O. Double pointer to FILE
94// is used because file may still not be opened by the time of scheduling
95// the task for execution.
96void AsyncWrite(FILE** file, int32 offset, const std::string& data) {
97  if (*file)
98    WriteToFile(*file, offset, data.data(), data.size());
99}
100
101// Truncates the file to the current position asynchronously on a background
102// thread. Double pointer to FILE is used because file may still not be opened
103// by the time of scheduling the task for execution.
104void AsyncTruncate(FILE** file) {
105  if (*file)
106    base::IgnoreResult(base::TruncateFile(*file));
107}
108
109// Closes the file on a background thread and releases memory used for storage
110// of FILE* value. Double pointer to FILE is used because file may still not
111// be opened by the time of scheduling the task for execution.
112void AsyncClose(FILE** file) {
113  if (*file)
114    base::IgnoreResult(fclose(*file));
115  free(file);
116}
117
118}  // namespace
119
120// TableBuilder ---------------------------------------------------------------
121
122// How rebuilding from history works
123// ---------------------------------
124//
125// We mark that we're rebuilding from history by setting the table_builder_
126// member in VisitedLinkMaster to the TableBuilder we create. This builder
127// will be called on the history thread by the history system for every URL
128// in the database.
129//
130// The builder will store the fingerprints for those URLs, and then marshalls
131// back to the main thread where the VisitedLinkMaster will be notified. The
132// master then replaces its table with a new table containing the computed
133// fingerprints.
134//
135// The builder must remain active while the history system is using it.
136// Sometimes, the master will be deleted before the rebuild is complete, in
137// which case it notifies the builder via DisownMaster(). The builder will
138// delete itself once rebuilding is complete, and not execute any callback.
139class VisitedLinkMaster::TableBuilder
140    : public VisitedLinkDelegate::URLEnumerator {
141 public:
142  TableBuilder(VisitedLinkMaster* master,
143               const uint8 salt[LINK_SALT_LENGTH]);
144
145  // Called on the main thread when the master is being destroyed. This will
146  // prevent a crash when the query completes and the master is no longer
147  // around. We can not actually do anything but mark this fact, since the
148  // table will be being rebuilt simultaneously on the other thread.
149  void DisownMaster();
150
151  // VisitedLinkDelegate::URLEnumerator
152  virtual void OnURL(const GURL& url) OVERRIDE;
153  virtual void OnComplete(bool succeed) OVERRIDE;
154
155 private:
156  virtual ~TableBuilder() {}
157
158  // OnComplete mashals to this function on the main thread to do the
159  // notification.
160  void OnCompleteMainThread();
161
162  // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
163  VisitedLinkMaster* master_;
164
165  // Indicates whether the operation has failed or not.
166  bool success_;
167
168  // Salt for this new table.
169  uint8 salt_[LINK_SALT_LENGTH];
170
171  // Stores the fingerprints we computed on the background thread.
172  VisitedLinkCommon::Fingerprints fingerprints_;
173
174  DISALLOW_COPY_AND_ASSIGN(TableBuilder);
175};
176
177// VisitedLinkMaster ----------------------------------------------------------
178
179VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context,
180                                     VisitedLinkDelegate* delegate,
181                                     bool persist_to_disk)
182    : browser_context_(browser_context),
183      delegate_(delegate),
184      listener_(new VisitedLinkEventListener(this, browser_context)),
185      persist_to_disk_(persist_to_disk) {
186  InitMembers();
187}
188
189VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
190                                     VisitedLinkDelegate* delegate,
191                                     bool persist_to_disk,
192                                     bool suppress_rebuild,
193                                     const base::FilePath& filename,
194                                     int32 default_table_size)
195    : browser_context_(NULL),
196      delegate_(delegate),
197      persist_to_disk_(persist_to_disk) {
198  listener_.reset(listener);
199  DCHECK(listener_.get());
200  InitMembers();
201
202  database_name_override_ = filename;
203  table_size_override_ = default_table_size;
204  suppress_rebuild_ = suppress_rebuild;
205}
206
207VisitedLinkMaster::~VisitedLinkMaster() {
208  if (table_builder_.get()) {
209    // Prevent the table builder from calling us back now that we're being
210    // destroyed. Note that we DON'T delete the object, since the history
211    // system is still writing into it. When that is complete, the table
212    // builder will destroy itself when it finds we are gone.
213    table_builder_->DisownMaster();
214  }
215  FreeURLTable();
216  // FreeURLTable() will schedule closing of the file and deletion of |file_|.
217  // So nothing should be done here.
218}
219
220void VisitedLinkMaster::InitMembers() {
221  file_ = NULL;
222  shared_memory_ = NULL;
223  shared_memory_serial_ = 0;
224  used_items_ = 0;
225  table_size_override_ = 0;
226  suppress_rebuild_ = false;
227  sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken();
228
229#ifndef NDEBUG
230  posted_asynchronous_operation_ = false;
231#endif
232}
233
234bool VisitedLinkMaster::Init() {
235  // We probably shouldn't be loading this from the UI thread,
236  // but it does need to happen early on in startup.
237  //   http://code.google.com/p/chromium/issues/detail?id=24163
238  base::ThreadRestrictions::ScopedAllowIO allow_io;
239
240  if (persist_to_disk_) {
241    if (InitFromFile())
242      return true;
243  }
244  return InitFromScratch(suppress_rebuild_);
245}
246
247VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
248  // Extra check that we are not incognito. This should not happen.
249  // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
250  // removed from BrowserContext.
251  if (browser_context_ && browser_context_->IsOffTheRecord()) {
252    NOTREACHED();
253    return null_hash_;
254  }
255
256  if (!url.is_valid())
257    return null_hash_;  // Don't add invalid URLs.
258
259  Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
260                                                  url.spec().size(),
261                                                  salt_);
262  if (table_builder_.get()) {
263    // If we have a pending delete for this fingerprint, cancel it.
264    std::set<Fingerprint>::iterator found =
265        deleted_since_rebuild_.find(fingerprint);
266    if (found != deleted_since_rebuild_.end())
267        deleted_since_rebuild_.erase(found);
268
269    // A rebuild is in progress, save this addition in the temporary list so
270    // it can be added once rebuild is complete.
271    added_since_rebuild_.insert(fingerprint);
272  }
273
274  // If the table is "full", we don't add URLs and just drop them on the floor.
275  // This can happen if we get thousands of new URLs and something causes
276  // the table resizing to fail. This check prevents a hang in that case. Note
277  // that this is *not* the resize limit, this is just a sanity check.
278  if (used_items_ / 8 > table_length_ / 10)
279    return null_hash_;  // Table is more than 80% full.
280
281  return AddFingerprint(fingerprint, true);
282}
283
284void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here,
285                                   const base::Closure& task) {
286  DCHECK(persist_to_disk_);
287  BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_,
288                                                            from_here, task);
289}
290
291void VisitedLinkMaster::AddURL(const GURL& url) {
292  Hash index = TryToAddURL(url);
293  if (!table_builder_.get() && index != null_hash_) {
294    // Not rebuilding, so we want to keep the file on disk up-to-date.
295    if (persist_to_disk_) {
296      WriteUsedItemCountToFile();
297      WriteHashRangeToFile(index, index);
298    }
299    ResizeTableIfNecessary();
300  }
301}
302
303void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
304  for (std::vector<GURL>::const_iterator i = url.begin();
305       i != url.end(); ++i) {
306    Hash index = TryToAddURL(*i);
307    if (!table_builder_.get() && index != null_hash_)
308      ResizeTableIfNecessary();
309  }
310
311  // Keeps the file on disk up-to-date.
312  if (!table_builder_.get() && persist_to_disk_)
313    WriteFullTable();
314}
315
316void VisitedLinkMaster::DeleteAllURLs() {
317  // Any pending modifications are invalid.
318  added_since_rebuild_.clear();
319  deleted_since_rebuild_.clear();
320
321  // Clear the hash table.
322  used_items_ = 0;
323  memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
324
325  // Resize it if it is now too empty. Resize may write the new table out for
326  // us, otherwise, schedule writing the new table to disk ourselves.
327  if (!ResizeTableIfNecessary() && persist_to_disk_)
328    WriteFullTable();
329
330  listener_->Reset();
331}
332
333VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() {
334  return delegate_;
335}
336
337void VisitedLinkMaster::DeleteURLs(URLIterator* urls) {
338  if (!urls->HasNextURL())
339    return;
340
341  listener_->Reset();
342
343  if (table_builder_.get()) {
344    // A rebuild is in progress, save this deletion in the temporary list so
345    // it can be added once rebuild is complete.
346    while (urls->HasNextURL()) {
347      const GURL& url(urls->NextURL());
348      if (!url.is_valid())
349        continue;
350
351      Fingerprint fingerprint =
352          ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_);
353      deleted_since_rebuild_.insert(fingerprint);
354
355      // If the URL was just added and now we're deleting it, it may be in the
356      // list of things added since the last rebuild. Delete it from that list.
357      std::set<Fingerprint>::iterator found =
358          added_since_rebuild_.find(fingerprint);
359      if (found != added_since_rebuild_.end())
360        added_since_rebuild_.erase(found);
361
362      // Delete the URLs from the in-memory table, but don't bother writing
363      // to disk since it will be replaced soon.
364      DeleteFingerprint(fingerprint, false);
365    }
366    return;
367  }
368
369  // Compute the deleted URLs' fingerprints and delete them
370  std::set<Fingerprint> deleted_fingerprints;
371  while (urls->HasNextURL()) {
372    const GURL& url(urls->NextURL());
373    if (!url.is_valid())
374      continue;
375    deleted_fingerprints.insert(
376        ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_));
377  }
378  DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
379}
380
381// See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
382VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
383    Fingerprint fingerprint,
384    bool send_notifications) {
385  if (!hash_table_ || table_length_ == 0) {
386    NOTREACHED();  // Not initialized.
387    return null_hash_;
388  }
389
390  Hash cur_hash = HashFingerprint(fingerprint);
391  Hash first_hash = cur_hash;
392  while (true) {
393    Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
394    if (cur_fingerprint == fingerprint)
395      return null_hash_;  // This fingerprint is already in there, do nothing.
396
397    if (cur_fingerprint == null_fingerprint_) {
398      // End of probe sequence found, insert here.
399      hash_table_[cur_hash] = fingerprint;
400      used_items_++;
401      // If allowed, notify listener that a new visited link was added.
402      if (send_notifications)
403        listener_->Add(fingerprint);
404      return cur_hash;
405    }
406
407    // Advance in the probe sequence.
408    cur_hash = IncrementHash(cur_hash);
409    if (cur_hash == first_hash) {
410      // This means that we've wrapped around and are about to go into an
411      // infinite loop. Something was wrong with the hashtable resizing
412      // logic, so stop here.
413      NOTREACHED();
414      return null_hash_;
415    }
416  }
417}
418
419void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
420    const std::set<Fingerprint>& fingerprints) {
421  bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
422
423  // Delete the URLs from the table.
424  for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
425       i != fingerprints.end(); ++i)
426    DeleteFingerprint(*i, !bulk_write);
427
428  // These deleted fingerprints may make us shrink the table.
429  if (ResizeTableIfNecessary())
430    return;  // The resize function wrote the new table to disk for us.
431
432  // Nobody wrote this out for us, write the full file to disk.
433  if (bulk_write && persist_to_disk_)
434    WriteFullTable();
435}
436
437bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
438                                          bool update_file) {
439  if (!hash_table_ || table_length_ == 0) {
440    NOTREACHED();  // Not initialized.
441    return false;
442  }
443  if (!IsVisited(fingerprint))
444    return false;  // Not in the database to delete.
445
446  // First update the header used count.
447  used_items_--;
448  if (update_file && persist_to_disk_)
449    WriteUsedItemCountToFile();
450
451  Hash deleted_hash = HashFingerprint(fingerprint);
452
453  // Find the range of "stuff" in the hash table that is adjacent to this
454  // fingerprint. These are things that could be affected by the change in
455  // the hash table. Since we use linear probing, anything after the deleted
456  // item up until an empty item could be affected.
457  Hash end_range = deleted_hash;
458  while (true) {
459    Hash next_hash = IncrementHash(end_range);
460    if (next_hash == deleted_hash)
461      break;  // We wrapped around and the whole table is full.
462    if (!hash_table_[next_hash])
463      break;  // Found the last spot.
464    end_range = next_hash;
465  }
466
467  // We could get all fancy and move the affected fingerprints around, but
468  // instead we just remove them all and re-add them (minus our deleted one).
469  // This will mean there's a small window of time where the affected links
470  // won't be marked visited.
471  base::StackVector<Fingerprint, 32> shuffled_fingerprints;
472  Hash stop_loop = IncrementHash(end_range);  // The end range is inclusive.
473  for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
474    if (hash_table_[i] != fingerprint) {
475      // Don't save the one we're deleting!
476      shuffled_fingerprints->push_back(hash_table_[i]);
477
478      // This will balance the increment of this value in AddFingerprint below
479      // so there is no net change.
480      used_items_--;
481    }
482    hash_table_[i] = null_fingerprint_;
483  }
484
485  if (!shuffled_fingerprints->empty()) {
486    // Need to add the new items back.
487    for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
488      AddFingerprint(shuffled_fingerprints[i], false);
489  }
490
491  // Write the affected range to disk [deleted_hash, end_range].
492  if (update_file && persist_to_disk_)
493    WriteHashRangeToFile(deleted_hash, end_range);
494
495  return true;
496}
497
498void VisitedLinkMaster::WriteFullTable() {
499  // This function can get called when the file is open, for example, when we
500  // resize the table. We must handle this case and not try to reopen the file,
501  // since there may be write operations pending on the file I/O thread.
502  //
503  // Note that once we start writing, we do not delete on error. This means
504  // there can be a partial file, but the short file will be detected next time
505  // we start, and will be replaced.
506  //
507  // This might possibly get corrupted if we crash in the middle of writing.
508  // We should pick up the most common types of these failures when we notice
509  // that the file size is different when we load it back in, and then we will
510  // regenerate the table.
511  DCHECK(persist_to_disk_);
512
513  if (!file_) {
514    file_ = static_cast<FILE**>(calloc(1, sizeof(*file_)));
515    base::FilePath filename;
516    GetDatabaseFileName(&filename);
517    PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename));
518  }
519
520  // Write the new header.
521  int32 header[4];
522  header[0] = kFileSignature;
523  header[1] = kFileCurrentVersion;
524  header[2] = table_length_;
525  header[3] = used_items_;
526  WriteToFile(file_, 0, header, sizeof(header));
527  WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
528
529  // Write the hash data.
530  WriteToFile(file_, kFileHeaderSize,
531              hash_table_, table_length_ * sizeof(Fingerprint));
532
533  // The hash table may have shrunk, so make sure this is the end.
534  PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_));
535}
536
537bool VisitedLinkMaster::InitFromFile() {
538  DCHECK(file_ == NULL);
539  DCHECK(persist_to_disk_);
540
541  base::FilePath filename;
542  GetDatabaseFileName(&filename);
543  base::ScopedFILE file_closer(base::OpenFile(filename, "rb+"));
544  if (!file_closer.get())
545    return false;
546
547  int32 num_entries, used_count;
548  if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
549    return false;  // Header isn't valid.
550
551  // Allocate and read the table.
552  if (!CreateURLTable(num_entries, false))
553    return false;
554  if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
555                    hash_table_, num_entries * sizeof(Fingerprint))) {
556    FreeURLTable();
557    return false;
558  }
559  used_items_ = used_count;
560
561#ifndef NDEBUG
562  DebugValidate();
563#endif
564
565  file_ = static_cast<FILE**>(malloc(sizeof(*file_)));
566  *file_ = file_closer.release();
567  return true;
568}
569
570bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
571  int32 table_size = kDefaultTableSize;
572  if (table_size_override_)
573    table_size = table_size_override_;
574
575  // The salt must be generated before the table so that it can be copied to
576  // the shared memory.
577  GenerateSalt(salt_);
578  if (!CreateURLTable(table_size, true))
579    return false;
580
581#ifndef NDEBUG
582  DebugValidate();
583#endif
584
585  if (suppress_rebuild && persist_to_disk_) {
586    // When we disallow rebuilds (normally just unit tests), just use the
587    // current empty table.
588    WriteFullTable();
589    return true;
590  }
591
592  // This will build the table from history. On the first run, history will
593  // be empty, so this will be correct. This will also write the new table
594  // to disk. We don't want to save explicitly here, since the rebuild may
595  // not complete, leaving us with an empty but valid visited link database.
596  // In the future, we won't know we need to try rebuilding again.
597  return RebuildTableFromDelegate();
598}
599
600bool VisitedLinkMaster::ReadFileHeader(FILE* file,
601                                       int32* num_entries,
602                                       int32* used_count,
603                                       uint8 salt[LINK_SALT_LENGTH]) {
604  DCHECK(persist_to_disk_);
605
606  // Get file size.
607  // Note that there is no need to seek back to the original location in the
608  // file since ReadFromFile() [which is the next call accessing the file]
609  // seeks before reading.
610  if (fseek(file, 0, SEEK_END) == -1)
611    return false;
612  size_t file_size = ftell(file);
613
614  if (file_size <= kFileHeaderSize)
615    return false;
616
617  uint8 header[kFileHeaderSize];
618  if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
619    return false;
620
621  // Verify the signature.
622  int32 signature;
623  memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
624  if (signature != kFileSignature)
625    return false;
626
627  // Verify the version is up-to-date. As with other read errors, a version
628  // mistmatch will trigger a rebuild of the database from history, which will
629  // have the effect of migrating the database.
630  int32 version;
631  memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
632  if (version != kFileCurrentVersion)
633    return false;  // Bad version.
634
635  // Read the table size and make sure it matches the file size.
636  memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
637  if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
638    return false;  // Bad size.
639
640  // Read the used item count.
641  memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
642  if (*used_count > *num_entries)
643    return false;  // Bad used item count;
644
645  // Read the salt.
646  memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
647
648  // This file looks OK from the header's perspective.
649  return true;
650}
651
652bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) {
653  if (!database_name_override_.empty()) {
654    // use this filename, the directory must exist
655    *filename = database_name_override_;
656    return true;
657  }
658
659  if (!browser_context_ || browser_context_->GetPath().empty())
660    return false;
661
662  base::FilePath profile_dir = browser_context_->GetPath();
663  *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
664  return true;
665}
666
667// Initializes the shared memory structure. The salt should already be filled
668// in so that it can be written to the shared memory
669bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
670  // The table is the size of the table followed by the entries.
671  uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
672
673  // Create the shared memory object.
674  shared_memory_ = new base::SharedMemory();
675  if (!shared_memory_)
676    return false;
677
678  base::SharedMemoryCreateOptions options;
679  options.size = alloc_size;
680  options.share_read_only = true;
681
682  if (!shared_memory_->Create(options) || !shared_memory_->Map(alloc_size)) {
683    delete shared_memory_;
684    shared_memory_ = NULL;
685    return false;
686  }
687
688  if (init_to_empty) {
689    memset(shared_memory_->memory(), 0, alloc_size);
690    used_items_ = 0;
691  }
692  table_length_ = num_entries;
693
694  // Save the header for other processes to read.
695  SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
696  header->length = table_length_;
697  memcpy(header->salt, salt_, LINK_SALT_LENGTH);
698
699  // Our table pointer is just the data immediately following the size.
700  hash_table_ = reinterpret_cast<Fingerprint*>(
701      static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
702
703  return true;
704}
705
706bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
707  base::SharedMemory *old_shared_memory = shared_memory_;
708  Fingerprint* old_hash_table = hash_table_;
709  int32 old_table_length = table_length_;
710  if (!CreateURLTable(num_entries, true)) {
711    // Try to put back the old state.
712    shared_memory_ = old_shared_memory;
713    hash_table_ = old_hash_table;
714    table_length_ = old_table_length;
715    return false;
716  }
717
718#ifndef NDEBUG
719  DebugValidate();
720#endif
721
722  return true;
723}
724
725void VisitedLinkMaster::FreeURLTable() {
726  if (shared_memory_) {
727    delete shared_memory_;
728    shared_memory_ = NULL;
729  }
730  if (!persist_to_disk_ || !file_)
731    return;
732  PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_));
733  // AsyncClose() will close the file and free the memory pointed by |file_|.
734  file_ = NULL;
735}
736
737bool VisitedLinkMaster::ResizeTableIfNecessary() {
738  DCHECK(table_length_ > 0) << "Must have a table";
739
740  // Load limits for good performance/space. We are pretty conservative about
741  // keeping the table not very full. This is because we use linear probing
742  // which increases the likelihood of clumps of entries which will reduce
743  // performance.
744  const float max_table_load = 0.5f;  // Grow when we're > this full.
745  const float min_table_load = 0.2f;  // Shrink when we're < this full.
746
747  float load = ComputeTableLoad();
748  if (load < max_table_load &&
749      (table_length_ <= static_cast<float>(kDefaultTableSize) ||
750       load > min_table_load))
751    return false;
752
753  // Table needs to grow or shrink.
754  int new_size = NewTableSizeForCount(used_items_);
755  DCHECK(new_size > used_items_);
756  DCHECK(load <= min_table_load || new_size > table_length_);
757  ResizeTable(new_size);
758  return true;
759}
760
761void VisitedLinkMaster::ResizeTable(int32 new_size) {
762  DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
763  shared_memory_serial_++;
764
765#ifndef NDEBUG
766  DebugValidate();
767#endif
768
769  base::SharedMemory* old_shared_memory = shared_memory_;
770  Fingerprint* old_hash_table = hash_table_;
771  int32 old_table_length = table_length_;
772  if (!BeginReplaceURLTable(new_size))
773    return;
774
775  // Now we have two tables, our local copy which is the old one, and the new
776  // one loaded into this object where we need to copy the data.
777  for (int32 i = 0; i < old_table_length; i++) {
778    Fingerprint cur = old_hash_table[i];
779    if (cur)
780      AddFingerprint(cur, false);
781  }
782
783  // On error unmapping, just forget about it since we can't do anything
784  // else to release it.
785  delete old_shared_memory;
786
787  // Send an update notification to all child processes so they read the new
788  // table.
789  listener_->NewTable(shared_memory_);
790
791#ifndef NDEBUG
792  DebugValidate();
793#endif
794
795  // The new table needs to be written to disk.
796  if (persist_to_disk_)
797    WriteFullTable();
798}
799
800uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
801  // These table sizes are selected to be the maximum prime number less than
802  // a "convenient" multiple of 1K.
803  static const int table_sizes[] = {
804      16381,    // 16K  = 16384   <- don't shrink below this table size
805                //                   (should be == default_table_size)
806      32767,    // 32K  = 32768
807      65521,    // 64K  = 65536
808      130051,   // 128K = 131072
809      262127,   // 256K = 262144
810      524269,   // 512K = 524288
811      1048549,  // 1M   = 1048576
812      2097143,  // 2M   = 2097152
813      4194301,  // 4M   = 4194304
814      8388571,  // 8M   = 8388608
815      16777199,  // 16M  = 16777216
816      33554347};  // 32M  = 33554432
817
818  // Try to leave the table 33% full.
819  int desired = item_count * 3;
820
821  // Find the closest prime.
822  for (size_t i = 0; i < arraysize(table_sizes); i ++) {
823    if (table_sizes[i] > desired)
824      return table_sizes[i];
825  }
826
827  // Growing very big, just approximate a "good" number, not growing as much
828  // as normal.
829  return item_count * 2 - 1;
830}
831
832// See the TableBuilder definition in the header file for how this works.
833bool VisitedLinkMaster::RebuildTableFromDelegate() {
834  DCHECK(!table_builder_.get());
835
836  // TODO(brettw) make sure we have reasonable salt!
837  table_builder_ = new TableBuilder(this, salt_);
838  delegate_->RebuildTable(table_builder_);
839  return true;
840}
841
842// See the TableBuilder declaration above for how this works.
843void VisitedLinkMaster::OnTableRebuildComplete(
844    bool success,
845    const std::vector<Fingerprint>& fingerprints) {
846  if (success) {
847    // Replace the old table with a new blank one.
848    shared_memory_serial_++;
849
850    // We are responsible for freeing it AFTER it has been replaced if
851    // replacement succeeds.
852    base::SharedMemory* old_shared_memory = shared_memory_;
853
854    int new_table_size = NewTableSizeForCount(
855        static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
856    if (BeginReplaceURLTable(new_table_size)) {
857      // Free the old table.
858      delete old_shared_memory;
859
860      // Add the stored fingerprints to the hash table.
861      for (size_t i = 0; i < fingerprints.size(); i++)
862        AddFingerprint(fingerprints[i], false);
863
864      // Also add anything that was added while we were asynchronously
865      // generating the new table.
866      for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
867           i != added_since_rebuild_.end(); ++i)
868        AddFingerprint(*i, false);
869      added_since_rebuild_.clear();
870
871      // Now handle deletions.
872      DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
873      deleted_since_rebuild_.clear();
874
875      // Send an update notification to all child processes.
876      listener_->NewTable(shared_memory_);
877
878      if (persist_to_disk_)
879        WriteFullTable();
880    }
881  }
882  table_builder_ = NULL;  // Will release our reference to the builder.
883
884  // Notify the unit test that the rebuild is complete (will be NULL in prod.)
885  if (!rebuild_complete_task_.is_null()) {
886    rebuild_complete_task_.Run();
887    rebuild_complete_task_.Reset();
888  }
889}
890
891void VisitedLinkMaster::WriteToFile(FILE** file,
892                                    off_t offset,
893                                    void* data,
894                                    int32 data_size) {
895  DCHECK(persist_to_disk_);
896#ifndef NDEBUG
897  posted_asynchronous_operation_ = true;
898#endif
899  PostIOTask(FROM_HERE,
900      base::Bind(&AsyncWrite, file, offset,
901                 std::string(static_cast<const char*>(data), data_size)));
902}
903
904void VisitedLinkMaster::WriteUsedItemCountToFile() {
905  DCHECK(persist_to_disk_);
906  if (!file_)
907    return;  // See comment on the file_ variable for why this might happen.
908  WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
909}
910
911void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
912  DCHECK(persist_to_disk_);
913
914  if (!file_)
915    return;  // See comment on the file_ variable for why this might happen.
916  if (last_hash < first_hash) {
917    // Handle wraparound at 0. This first write is first_hash->EOF
918    WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
919                &hash_table_[first_hash],
920                (table_length_ - first_hash + 1) * sizeof(Fingerprint));
921
922    // Now do 0->last_lash.
923    WriteToFile(file_, kFileHeaderSize, hash_table_,
924                (last_hash + 1) * sizeof(Fingerprint));
925  } else {
926    // Normal case, just write the range.
927    WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
928                &hash_table_[first_hash],
929                (last_hash - first_hash + 1) * sizeof(Fingerprint));
930  }
931}
932
933bool VisitedLinkMaster::ReadFromFile(FILE* file,
934                                     off_t offset,
935                                     void* data,
936                                     size_t data_size) {
937  DCHECK(persist_to_disk_);
938#ifndef NDEBUG
939  // Since this function is synchronous, we require that no asynchronous
940  // operations could possibly be pending.
941  DCHECK(!posted_asynchronous_operation_);
942#endif
943
944  if (fseek(file, offset, SEEK_SET) != 0)
945    return false;
946
947  size_t num_read = fread(data, 1, data_size, file);
948  return num_read == data_size;
949}
950
951// VisitedLinkTableBuilder ----------------------------------------------------
952
953VisitedLinkMaster::TableBuilder::TableBuilder(
954    VisitedLinkMaster* master,
955    const uint8 salt[LINK_SALT_LENGTH])
956    : master_(master),
957      success_(true) {
958  fingerprints_.reserve(4096);
959  memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8));
960}
961
962// TODO(brettw): Do we want to try to cancel the request if this happens? It
963// could delay shutdown if there are a lot of URLs.
964void VisitedLinkMaster::TableBuilder::DisownMaster() {
965  master_ = NULL;
966}
967
968void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
969  if (!url.is_empty()) {
970    fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
971        url.spec().data(), url.spec().length(), salt_));
972  }
973}
974
975void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
976  success_ = success;
977  DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
978
979  // Marshal to the main thread to notify the VisitedLinkMaster that the
980  // rebuild is complete.
981  BrowserThread::PostTask(
982      BrowserThread::UI, FROM_HERE,
983      base::Bind(&TableBuilder::OnCompleteMainThread, this));
984}
985
986void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
987  if (master_)
988    master_->OnTableRebuildComplete(success_, fingerprints_);
989}
990
991}  // namespace visitedlink
992