1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/visitedlink/visitedlink_master.h"
6
7#if defined(OS_WIN)
8#include <windows.h>
9#include <io.h>
10#include <shlobj.h>
11#endif  // defined(OS_WIN)
12#include <stdio.h>
13
14#include <algorithm>
15
16#include "base/file_util.h"
17#include "base/logging.h"
18#include "base/message_loop.h"
19#include "base/path_service.h"
20#include "base/process_util.h"
21#include "base/rand_util.h"
22#include "base/stack_container.h"
23#include "base/string_util.h"
24#include "base/threading/thread_restrictions.h"
25#include "content/browser/browser_thread.h"
26#include "chrome/browser/history/history.h"
27#include "chrome/browser/profiles/profile.h"
28
29using file_util::ScopedFILE;
30using file_util::OpenFile;
31using file_util::TruncateFile;
32
33const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
34const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
35const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
36const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
37const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
38
39const int32 VisitedLinkMaster::kFileCurrentVersion = 2;
40
41// the signature at the beginning of the URL table = "VLnk" (visited links)
42const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
43const size_t VisitedLinkMaster::kFileHeaderSize =
44    kFileHeaderSaltOffset + LINK_SALT_LENGTH;
45
46// This value should also be the same as the smallest size in the lookup
47// table in NewTableSizeForCount (prime number).
48const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
49
50const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
51
52namespace {
53
54// Fills the given salt structure with some quasi-random values
55// It is not necessary to generate a cryptographically strong random string,
56// only that it be reasonably different for different users.
57void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
58  DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
59  uint64 randval = base::RandUint64();
60  memcpy(salt, &randval, 8);
61}
62
63// AsyncWriter ----------------------------------------------------------------
64
65// This task executes on a background thread and executes a write. This
66// prevents us from blocking the UI thread doing I/O.
67class AsyncWriter : public Task {
68 public:
69  AsyncWriter(FILE* file, int32 offset, const void* data, size_t data_len)
70      : file_(file),
71        offset_(offset) {
72    data_->resize(data_len);
73    memcpy(&*data_->begin(), data, data_len);
74  }
75
76  virtual void Run() {
77    WriteToFile(file_, offset_,
78                &*data_->begin(), static_cast<int32>(data_->size()));
79  }
80
81  // Exposed as a static so it can be called directly from the Master to
82  // reduce the number of platform-specific I/O sites we have. Returns true if
83  // the write was complete.
84  static bool WriteToFile(FILE* file,
85                          off_t offset,
86                          const void* data,
87                          size_t data_len) {
88    if (fseek(file, offset, SEEK_SET) != 0)
89      return false;  // Don't write to an invalid part of the file.
90
91    size_t num_written = fwrite(data, 1, data_len, file);
92
93    // The write may not make it to the kernel (stdlib may buffer the write)
94    // until the next fseek/fclose call.  If we crash, it's easy for our used
95    // item count to be out of sync with the number of hashes we write.
96    // Protect against this by calling fflush.
97    int ret = fflush(file);
98    DCHECK_EQ(0, ret);
99    return num_written == data_len;
100  }
101
102 private:
103  // The data to write and where to write it.
104  FILE* file_;
105  int32 offset_;  // Offset from the beginning of the file.
106
107  // Most writes are just a single fingerprint, so we reserve that much in this
108  // object to avoid mallocs in that case.
109  StackVector<char, sizeof(VisitedLinkCommon::Fingerprint)> data_;
110
111  DISALLOW_COPY_AND_ASSIGN(AsyncWriter);
112};
113
114// Used to asynchronously set the end of the file. This must be done on the
115// same thread as the writing to keep things synchronized.
116class AsyncSetEndOfFile : public Task {
117 public:
118  explicit AsyncSetEndOfFile(FILE* file) : file_(file) {}
119
120  virtual void Run() {
121    TruncateFile(file_);
122  }
123
124 private:
125  FILE* file_;
126  DISALLOW_COPY_AND_ASSIGN(AsyncSetEndOfFile);
127};
128
129// Used to asynchronously close a file. This must be done on the same thread as
130// the writing to keep things synchronized.
131class AsyncCloseHandle : public Task {
132 public:
133  explicit AsyncCloseHandle(FILE* file) : file_(file) {}
134
135  virtual void Run() {
136    fclose(file_);
137  }
138
139 private:
140  FILE* file_;
141  DISALLOW_COPY_AND_ASSIGN(AsyncCloseHandle);
142};
143
144}  // namespace
145
146// TableBuilder ---------------------------------------------------------------
147
148// How rebuilding from history works
149// ---------------------------------
150//
151// We mark that we're rebuilding from history by setting the table_builder_
152// member in VisitedLinkMaster to the TableBuilder we create. This builder
153// will be called on the history thread by the history system for every URL
154// in the database.
155//
156// The builder will store the fingerprints for those URLs, and then marshalls
157// back to the main thread where the VisitedLinkMaster will be notified. The
158// master then replaces its table with a new table containing the computed
159// fingerprints.
160//
161// The builder must remain active while the history system is using it.
162// Sometimes, the master will be deleted before the rebuild is complete, in
163// which case it notifies the builder via DisownMaster(). The builder will
164// delete itself once rebuilding is complete, and not execute any callback.
165class VisitedLinkMaster::TableBuilder
166    : public HistoryService::URLEnumerator,
167      public base::RefCountedThreadSafe<TableBuilder> {
168 public:
169  TableBuilder(VisitedLinkMaster* master,
170               const uint8 salt[LINK_SALT_LENGTH]);
171
172  // Called on the main thread when the master is being destroyed. This will
173  // prevent a crash when the query completes and the master is no longer
174  // around. We can not actually do anything but mark this fact, since the
175  // table will be being rebuilt simultaneously on the other thread.
176  void DisownMaster();
177
178  // HistoryService::URLEnumerator
179  virtual void OnURL(const GURL& url);
180  virtual void OnComplete(bool succeed);
181
182 private:
183  friend class base::RefCountedThreadSafe<TableBuilder>;
184
185  ~TableBuilder() {}
186
187  // OnComplete mashals to this function on the main thread to do the
188  // notification.
189  void OnCompleteMainThread();
190
191  // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
192  VisitedLinkMaster* master_;
193
194  // Indicates whether the operation has failed or not.
195  bool success_;
196
197  // Salt for this new table.
198  uint8 salt_[LINK_SALT_LENGTH];
199
200  // Stores the fingerprints we computed on the background thread.
201  VisitedLinkCommon::Fingerprints fingerprints_;
202};
203
204// VisitedLinkMaster ----------------------------------------------------------
205
206VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
207                                     Profile* profile) {
208  InitMembers(listener, profile);
209}
210
211VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
212                                     HistoryService* history_service,
213                                     bool suppress_rebuild,
214                                     const FilePath& filename,
215                                     int32 default_table_size) {
216  InitMembers(listener, NULL);
217
218  database_name_override_ = filename;
219  table_size_override_ = default_table_size;
220  history_service_override_ = history_service;
221  suppress_rebuild_ = suppress_rebuild;
222}
223
224VisitedLinkMaster::~VisitedLinkMaster() {
225  if (table_builder_.get()) {
226    // Prevent the table builder from calling us back now that we're being
227    // destroyed. Note that we DON'T delete the object, since the history
228    // system is still writing into it. When that is complete, the table
229    // builder will destroy itself when it finds we are gone.
230    table_builder_->DisownMaster();
231  }
232  FreeURLTable();
233}
234
235void VisitedLinkMaster::InitMembers(Listener* listener, Profile* profile) {
236  DCHECK(listener);
237
238  listener_ = listener;
239  file_ = NULL;
240  shared_memory_ = NULL;
241  shared_memory_serial_ = 0;
242  used_items_ = 0;
243  table_size_override_ = 0;
244  history_service_override_ = NULL;
245  suppress_rebuild_ = false;
246  profile_ = profile;
247
248#ifndef NDEBUG
249  posted_asynchronous_operation_ = false;
250#endif
251}
252
253bool VisitedLinkMaster::Init() {
254  // We probably shouldn't be loading this from the UI thread,
255  // but it does need to happen early on in startup.
256  //   http://code.google.com/p/chromium/issues/detail?id=24163
257  base::ThreadRestrictions::ScopedAllowIO allow_io;
258  if (!InitFromFile())
259    return InitFromScratch(suppress_rebuild_);
260  return true;
261}
262
263VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
264  // Extra check that we are not incognito. This should not happen.
265  if (profile_ && profile_->IsOffTheRecord()) {
266    NOTREACHED();
267    return null_hash_;
268  }
269
270  if (!url.is_valid())
271    return null_hash_;  // Don't add invalid URLs.
272
273  Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
274                                                  url.spec().size(),
275                                                  salt_);
276  if (table_builder_) {
277    // If we have a pending delete for this fingerprint, cancel it.
278    std::set<Fingerprint>::iterator found =
279        deleted_since_rebuild_.find(fingerprint);
280    if (found != deleted_since_rebuild_.end())
281        deleted_since_rebuild_.erase(found);
282
283    // A rebuild is in progress, save this addition in the temporary list so
284    // it can be added once rebuild is complete.
285    added_since_rebuild_.insert(fingerprint);
286  }
287
288  // If the table is "full", we don't add URLs and just drop them on the floor.
289  // This can happen if we get thousands of new URLs and something causes
290  // the table resizing to fail. This check prevents a hang in that case. Note
291  // that this is *not* the resize limit, this is just a sanity check.
292  if (used_items_ / 8 > table_length_ / 10)
293    return null_hash_;  // Table is more than 80% full.
294
295  return AddFingerprint(fingerprint, true);
296}
297
298void VisitedLinkMaster::AddURL(const GURL& url) {
299  Hash index = TryToAddURL(url);
300  if (!table_builder_ && index != null_hash_) {
301    // Not rebuilding, so we want to keep the file on disk up-to-date.
302    WriteUsedItemCountToFile();
303    WriteHashRangeToFile(index, index);
304    ResizeTableIfNecessary();
305  }
306}
307
308void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
309  for (std::vector<GURL>::const_iterator i = url.begin();
310       i != url.end(); ++i) {
311    Hash index = TryToAddURL(*i);
312    if (!table_builder_ && index != null_hash_)
313      ResizeTableIfNecessary();
314  }
315
316  // Keeps the file on disk up-to-date.
317  if (!table_builder_)
318    WriteFullTable();
319}
320
321void VisitedLinkMaster::DeleteAllURLs() {
322  // Any pending modifications are invalid.
323  added_since_rebuild_.clear();
324  deleted_since_rebuild_.clear();
325
326  // Clear the hash table.
327  used_items_ = 0;
328  memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
329
330  // Resize it if it is now too empty. Resize may write the new table out for
331  // us, otherwise, schedule writing the new table to disk ourselves.
332  if (!ResizeTableIfNecessary())
333    WriteFullTable();
334
335  listener_->Reset();
336}
337
338void VisitedLinkMaster::DeleteURLs(const std::set<GURL>& urls) {
339  typedef std::set<GURL>::const_iterator SetIterator;
340
341  if (urls.empty())
342    return;
343
344  listener_->Reset();
345
346  if (table_builder_) {
347    // A rebuild is in progress, save this deletion in the temporary list so
348    // it can be added once rebuild is complete.
349    for (SetIterator i = urls.begin(); i != urls.end(); ++i) {
350      if (!i->is_valid())
351        continue;
352
353      Fingerprint fingerprint =
354          ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_);
355      deleted_since_rebuild_.insert(fingerprint);
356
357      // If the URL was just added and now we're deleting it, it may be in the
358      // list of things added since the last rebuild. Delete it from that list.
359      std::set<Fingerprint>::iterator found =
360          added_since_rebuild_.find(fingerprint);
361      if (found != added_since_rebuild_.end())
362        added_since_rebuild_.erase(found);
363
364      // Delete the URLs from the in-memory table, but don't bother writing
365      // to disk since it will be replaced soon.
366      DeleteFingerprint(fingerprint, false);
367    }
368    return;
369  }
370
371  // Compute the deleted URLs' fingerprints and delete them
372  std::set<Fingerprint> deleted_fingerprints;
373  for (SetIterator i = urls.begin(); i != urls.end(); ++i) {
374    if (!i->is_valid())
375      continue;
376    deleted_fingerprints.insert(
377        ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_));
378  }
379  DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
380}
381
382// See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
383VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
384    Fingerprint fingerprint,
385    bool send_notifications) {
386  if (!hash_table_ || table_length_ == 0) {
387    NOTREACHED();  // Not initialized.
388    return null_hash_;
389  }
390
391  Hash cur_hash = HashFingerprint(fingerprint);
392  Hash first_hash = cur_hash;
393  while (true) {
394    Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
395    if (cur_fingerprint == fingerprint)
396      return null_hash_;  // This fingerprint is already in there, do nothing.
397
398    if (cur_fingerprint == null_fingerprint_) {
399      // End of probe sequence found, insert here.
400      hash_table_[cur_hash] = fingerprint;
401      used_items_++;
402      // If allowed, notify listener that a new visited link was added.
403      if (send_notifications)
404        listener_->Add(fingerprint);
405      return cur_hash;
406    }
407
408    // Advance in the probe sequence.
409    cur_hash = IncrementHash(cur_hash);
410    if (cur_hash == first_hash) {
411      // This means that we've wrapped around and are about to go into an
412      // infinite loop. Something was wrong with the hashtable resizing
413      // logic, so stop here.
414      NOTREACHED();
415      return null_hash_;
416    }
417  }
418}
419
420void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
421    const std::set<Fingerprint>& fingerprints) {
422  bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
423
424  // Delete the URLs from the table.
425  for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
426       i != fingerprints.end(); ++i)
427    DeleteFingerprint(*i, !bulk_write);
428
429  // These deleted fingerprints may make us shrink the table.
430  if (ResizeTableIfNecessary())
431    return;  // The resize function wrote the new table to disk for us.
432
433  // Nobody wrote this out for us, write the full file to disk.
434  if (bulk_write)
435    WriteFullTable();
436}
437
438bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
439                                          bool update_file) {
440  if (!hash_table_ || table_length_ == 0) {
441    NOTREACHED();  // Not initialized.
442    return false;
443  }
444  if (!IsVisited(fingerprint))
445    return false;  // Not in the database to delete.
446
447  // First update the header used count.
448  used_items_--;
449  if (update_file)
450    WriteUsedItemCountToFile();
451
452  Hash deleted_hash = HashFingerprint(fingerprint);
453
454  // Find the range of "stuff" in the hash table that is adjacent to this
455  // fingerprint. These are things that could be affected by the change in
456  // the hash table. Since we use linear probing, anything after the deleted
457  // item up until an empty item could be affected.
458  Hash end_range = deleted_hash;
459  while (true) {
460    Hash next_hash = IncrementHash(end_range);
461    if (next_hash == deleted_hash)
462      break;  // We wrapped around and the whole table is full.
463    if (!hash_table_[next_hash])
464      break;  // Found the last spot.
465    end_range = next_hash;
466  }
467
468  // We could get all fancy and move the affected fingerprints around, but
469  // instead we just remove them all and re-add them (minus our deleted one).
470  // This will mean there's a small window of time where the affected links
471  // won't be marked visited.
472  StackVector<Fingerprint, 32> shuffled_fingerprints;
473  Hash stop_loop = IncrementHash(end_range);  // The end range is inclusive.
474  for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
475    if (hash_table_[i] != fingerprint) {
476      // Don't save the one we're deleting!
477      shuffled_fingerprints->push_back(hash_table_[i]);
478
479      // This will balance the increment of this value in AddFingerprint below
480      // so there is no net change.
481      used_items_--;
482    }
483    hash_table_[i] = null_fingerprint_;
484  }
485
486  if (!shuffled_fingerprints->empty()) {
487    // Need to add the new items back.
488    for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
489      AddFingerprint(shuffled_fingerprints[i], false);
490  }
491
492  // Write the affected range to disk [deleted_hash, end_range].
493  if (update_file)
494    WriteHashRangeToFile(deleted_hash, end_range);
495
496  return true;
497}
498
499bool VisitedLinkMaster::WriteFullTable() {
500  // This function can get called when the file is open, for example, when we
501  // resize the table. We must handle this case and not try to reopen the file,
502  // since there may be write operations pending on the file I/O thread.
503  //
504  // Note that once we start writing, we do not delete on error. This means
505  // there can be a partial file, but the short file will be detected next time
506  // we start, and will be replaced.
507  //
508  // This might possibly get corrupted if we crash in the middle of writing.
509  // We should pick up the most common types of these failures when we notice
510  // that the file size is different when we load it back in, and then we will
511  // regenerate the table.
512  if (!file_) {
513    FilePath filename;
514    GetDatabaseFileName(&filename);
515    file_ = OpenFile(filename, "wb+");
516    if (!file_) {
517      DLOG(ERROR) << "Failed to open file " << filename.value();
518      return false;
519    }
520  }
521
522  // Write the new header.
523  int32 header[4];
524  header[0] = kFileSignature;
525  header[1] = kFileCurrentVersion;
526  header[2] = table_length_;
527  header[3] = used_items_;
528  WriteToFile(file_, 0, header, sizeof(header));
529  WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
530
531  // Write the hash data.
532  WriteToFile(file_, kFileHeaderSize,
533              hash_table_, table_length_ * sizeof(Fingerprint));
534
535  // The hash table may have shrunk, so make sure this is the end.
536  BrowserThread::PostTask(
537      BrowserThread::FILE, FROM_HERE, new AsyncSetEndOfFile(file_));
538  return true;
539}
540
541bool VisitedLinkMaster::InitFromFile() {
542  DCHECK(file_ == NULL);
543
544  FilePath filename;
545  GetDatabaseFileName(&filename);
546  ScopedFILE file_closer(OpenFile(filename, "rb+"));
547  if (!file_closer.get())
548    return false;
549
550  int32 num_entries, used_count;
551  if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
552    return false;  // Header isn't valid.
553
554  // Allocate and read the table.
555  if (!CreateURLTable(num_entries, false))
556    return false;
557  if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
558                    hash_table_, num_entries * sizeof(Fingerprint))) {
559    FreeURLTable();
560    return false;
561  }
562  used_items_ = used_count;
563
564#ifndef NDEBUG
565  DebugValidate();
566#endif
567
568  file_ = file_closer.release();
569  return true;
570}
571
572bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
573  int32 table_size = kDefaultTableSize;
574  if (table_size_override_)
575    table_size = table_size_override_;
576
577  // The salt must be generated before the table so that it can be copied to
578  // the shared memory.
579  GenerateSalt(salt_);
580  if (!CreateURLTable(table_size, true))
581    return false;
582
583#ifndef NDEBUG
584  DebugValidate();
585#endif
586
587  if (suppress_rebuild) {
588    // When we disallow rebuilds (normally just unit tests), just use the
589    // current empty table.
590    return WriteFullTable();
591  }
592
593  // This will build the table from history. On the first run, history will
594  // be empty, so this will be correct. This will also write the new table
595  // to disk. We don't want to save explicitly here, since the rebuild may
596  // not complete, leaving us with an empty but valid visited link database.
597  // In the future, we won't know we need to try rebuilding again.
598  return RebuildTableFromHistory();
599}
600
601bool VisitedLinkMaster::ReadFileHeader(FILE* file,
602                                       int32* num_entries,
603                                       int32* used_count,
604                                       uint8 salt[LINK_SALT_LENGTH]) {
605  // Get file size.
606  // Note that there is no need to seek back to the original location in the
607  // file since ReadFromFile() [which is the next call accessing the file]
608  // seeks before reading.
609  if (fseek(file, 0, SEEK_END) == -1)
610    return false;
611  size_t file_size = ftell(file);
612
613  if (file_size <= kFileHeaderSize)
614    return false;
615
616  uint8 header[kFileHeaderSize];
617  if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
618    return false;
619
620  // Verify the signature.
621  int32 signature;
622  memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
623  if (signature != kFileSignature)
624    return false;
625
626  // Verify the version is up-to-date. As with other read errors, a version
627  // mistmatch will trigger a rebuild of the database from history, which will
628  // have the effect of migrating the database.
629  int32 version;
630  memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
631  if (version != kFileCurrentVersion)
632    return false;  // Bad version.
633
634  // Read the table size and make sure it matches the file size.
635  memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
636  if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
637    return false;  // Bad size.
638
639  // Read the used item count.
640  memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
641  if (*used_count > *num_entries)
642    return false;  // Bad used item count;
643
644  // Read the salt.
645  memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
646
647  // This file looks OK from the header's perspective.
648  return true;
649}
650
651bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) {
652  if (!database_name_override_.empty()) {
653    // use this filename, the directory must exist
654    *filename = database_name_override_;
655    return true;
656  }
657
658  if (!profile_ || profile_->GetPath().empty())
659    return false;
660
661  FilePath profile_dir = profile_->GetPath();
662  *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
663  return true;
664}
665
666// Initializes the shared memory structure. The salt should already be filled
667// in so that it can be written to the shared memory
668bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
669  // The table is the size of the table followed by the entries.
670  uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
671
672  // Create the shared memory object.
673  shared_memory_ = new base::SharedMemory();
674  if (!shared_memory_)
675    return false;
676
677  if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) {
678    delete shared_memory_;
679    shared_memory_ = NULL;
680    return false;
681  }
682
683  if (init_to_empty) {
684    memset(shared_memory_->memory(), 0, alloc_size);
685    used_items_ = 0;
686  }
687  table_length_ = num_entries;
688
689  // Save the header for other processes to read.
690  SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
691  header->length = table_length_;
692  memcpy(header->salt, salt_, LINK_SALT_LENGTH);
693
694  // Our table pointer is just the data immediately following the size.
695  hash_table_ = reinterpret_cast<Fingerprint*>(
696      static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
697
698  return true;
699}
700
701bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
702  base::SharedMemory *old_shared_memory = shared_memory_;
703  Fingerprint* old_hash_table = hash_table_;
704  int32 old_table_length = table_length_;
705  if (!CreateURLTable(num_entries, true)) {
706    // Try to put back the old state.
707    shared_memory_ = old_shared_memory;
708    hash_table_ = old_hash_table;
709    table_length_ = old_table_length;
710    return false;
711  }
712
713#ifndef NDEBUG
714  DebugValidate();
715#endif
716
717  return true;
718}
719
720void VisitedLinkMaster::FreeURLTable() {
721  if (shared_memory_) {
722    delete shared_memory_;
723    shared_memory_ = NULL;
724  }
725  if (!file_)
726    return;
727
728  BrowserThread::PostTask(
729      BrowserThread::FILE, FROM_HERE, new AsyncCloseHandle(file_));
730}
731
732bool VisitedLinkMaster::ResizeTableIfNecessary() {
733  DCHECK(table_length_ > 0) << "Must have a table";
734
735  // Load limits for good performance/space. We are pretty conservative about
736  // keeping the table not very full. This is because we use linear probing
737  // which increases the likelihood of clumps of entries which will reduce
738  // performance.
739  const float max_table_load = 0.5f;  // Grow when we're > this full.
740  const float min_table_load = 0.2f;  // Shrink when we're < this full.
741
742  float load = ComputeTableLoad();
743  if (load < max_table_load &&
744      (table_length_ <= static_cast<float>(kDefaultTableSize) ||
745       load > min_table_load))
746    return false;
747
748  // Table needs to grow or shrink.
749  int new_size = NewTableSizeForCount(used_items_);
750  DCHECK(new_size > used_items_);
751  DCHECK(load <= min_table_load || new_size > table_length_);
752  ResizeTable(new_size);
753  return true;
754}
755
756void VisitedLinkMaster::ResizeTable(int32 new_size) {
757  DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
758  shared_memory_serial_++;
759
760#ifndef NDEBUG
761  DebugValidate();
762#endif
763
764  base::SharedMemory* old_shared_memory = shared_memory_;
765  Fingerprint* old_hash_table = hash_table_;
766  int32 old_table_length = table_length_;
767  if (!BeginReplaceURLTable(new_size))
768    return;
769
770  // Now we have two tables, our local copy which is the old one, and the new
771  // one loaded into this object where we need to copy the data.
772  for (int32 i = 0; i < old_table_length; i++) {
773    Fingerprint cur = old_hash_table[i];
774    if (cur)
775      AddFingerprint(cur, false);
776  }
777
778  // On error unmapping, just forget about it since we can't do anything
779  // else to release it.
780  delete old_shared_memory;
781
782  // Send an update notification to all child processes so they read the new
783  // table.
784  listener_->NewTable(shared_memory_);
785
786#ifndef NDEBUG
787  DebugValidate();
788#endif
789
790  // The new table needs to be written to disk.
791  WriteFullTable();
792}
793
794uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
795  // These table sizes are selected to be the maximum prime number less than
796  // a "convenient" multiple of 1K.
797  static const int table_sizes[] = {
798      16381,    // 16K  = 16384   <- don't shrink below this table size
799                //                   (should be == default_table_size)
800      32767,    // 32K  = 32768
801      65521,    // 64K  = 65536
802      130051,   // 128K = 131072
803      262127,   // 256K = 262144
804      524269,   // 512K = 524288
805      1048549,  // 1M   = 1048576
806      2097143,  // 2M   = 2097152
807      4194301,  // 4M   = 4194304
808      8388571,  // 8M   = 8388608
809      16777199,  // 16M  = 16777216
810      33554347};  // 32M  = 33554432
811
812  // Try to leave the table 33% full.
813  int desired = item_count * 3;
814
815  // Find the closest prime.
816  for (size_t i = 0; i < arraysize(table_sizes); i ++) {
817    if (table_sizes[i] > desired)
818      return table_sizes[i];
819  }
820
821  // Growing very big, just approximate a "good" number, not growing as much
822  // as normal.
823  return item_count * 2 - 1;
824}
825
826// See the TableBuilder definition in the header file for how this works.
827bool VisitedLinkMaster::RebuildTableFromHistory() {
828  DCHECK(!table_builder_);
829  if (table_builder_)
830    return false;
831
832  HistoryService* history_service = history_service_override_;
833  if (!history_service && profile_) {
834    history_service = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
835  }
836
837  if (!history_service) {
838    DLOG(WARNING) << "Attempted to rebuild visited link table, but couldn't "
839                     "obtain a HistoryService.";
840    return false;
841  }
842
843  // TODO(brettw) make sure we have reasonable salt!
844  table_builder_ = new TableBuilder(this, salt_);
845
846  // Make sure the table builder stays live during the call, even if the
847  // master is deleted. This is balanced in TableBuilder::OnCompleteMainThread.
848  table_builder_->AddRef();
849  history_service->IterateURLs(table_builder_);
850  return true;
851}
852
853// See the TableBuilder declaration above for how this works.
854void VisitedLinkMaster::OnTableRebuildComplete(
855    bool success,
856    const std::vector<Fingerprint>& fingerprints) {
857  if (success) {
858    // Replace the old table with a new blank one.
859    shared_memory_serial_++;
860
861    // We are responsible for freeing it AFTER it has been replaced if
862    // replacement succeeds.
863    base::SharedMemory* old_shared_memory = shared_memory_;
864
865    int new_table_size = NewTableSizeForCount(
866        static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
867    if (BeginReplaceURLTable(new_table_size)) {
868      // Free the old table.
869      delete old_shared_memory;
870
871      // Add the stored fingerprints to the hash table.
872      for (size_t i = 0; i < fingerprints.size(); i++)
873        AddFingerprint(fingerprints[i], false);
874
875      // Also add anything that was added while we were asynchronously
876      // generating the new table.
877      for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
878           i != added_since_rebuild_.end(); ++i)
879        AddFingerprint(*i, false);
880      added_since_rebuild_.clear();
881
882      // Now handle deletions.
883      DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
884      deleted_since_rebuild_.clear();
885
886      // Send an update notification to all child processes.
887      listener_->NewTable(shared_memory_);
888
889      // We shouldn't be writing the table from the main thread!
890      //   http://code.google.com/p/chromium/issues/detail?id=24163
891      base::ThreadRestrictions::ScopedAllowIO allow_io;
892      WriteFullTable();
893    }
894  }
895  table_builder_ = NULL;  // Will release our reference to the builder.
896
897  // Notify the unit test that the rebuild is complete (will be NULL in prod.)
898  if (rebuild_complete_task_.get()) {
899    rebuild_complete_task_->Run();
900    rebuild_complete_task_.reset(NULL);
901  }
902}
903
904void VisitedLinkMaster::WriteToFile(FILE* file,
905                                    off_t offset,
906                                    void* data,
907                                    int32 data_size) {
908#ifndef NDEBUG
909  posted_asynchronous_operation_ = true;
910#endif
911
912  BrowserThread::PostTask(
913      BrowserThread::FILE, FROM_HERE,
914      new AsyncWriter(file, offset, data, data_size));
915}
916
917void VisitedLinkMaster::WriteUsedItemCountToFile() {
918  if (!file_)
919    return;  // See comment on the file_ variable for why this might happen.
920  WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
921}
922
923void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
924  if (!file_)
925    return;  // See comment on the file_ variable for why this might happen.
926  if (last_hash < first_hash) {
927    // Handle wraparound at 0. This first write is first_hash->EOF
928    WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
929                &hash_table_[first_hash],
930                (table_length_ - first_hash + 1) * sizeof(Fingerprint));
931
932    // Now do 0->last_lash.
933    WriteToFile(file_, kFileHeaderSize, hash_table_,
934                (last_hash + 1) * sizeof(Fingerprint));
935  } else {
936    // Normal case, just write the range.
937    WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
938                &hash_table_[first_hash],
939                (last_hash - first_hash + 1) * sizeof(Fingerprint));
940  }
941}
942
943bool VisitedLinkMaster::ReadFromFile(FILE* file,
944                                     off_t offset,
945                                     void* data,
946                                     size_t data_size) {
947#ifndef NDEBUG
948  // Since this function is synchronous, we require that no asynchronous
949  // operations could possibly be pending.
950  DCHECK(!posted_asynchronous_operation_);
951#endif
952
953  if (fseek(file, offset, SEEK_SET) != 0)
954    return false;
955
956  size_t num_read = fread(data, 1, data_size, file);
957  return num_read == data_size;
958}
959
960// VisitedLinkTableBuilder ----------------------------------------------------
961
962VisitedLinkMaster::TableBuilder::TableBuilder(
963    VisitedLinkMaster* master,
964    const uint8 salt[LINK_SALT_LENGTH])
965    : master_(master),
966      success_(true) {
967  fingerprints_.reserve(4096);
968  memcpy(salt_, salt, sizeof(salt));
969}
970
971// TODO(brettw): Do we want to try to cancel the request if this happens? It
972// could delay shutdown if there are a lot of URLs.
973void VisitedLinkMaster::TableBuilder::DisownMaster() {
974  master_ = NULL;
975}
976
977void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
978  if (!url.is_empty()) {
979    fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
980        url.spec().data(), url.spec().length(), salt_));
981  }
982}
983
984void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
985  success_ = success;
986  DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
987
988  // Marshal to the main thread to notify the VisitedLinkMaster that the
989  // rebuild is complete.
990  BrowserThread::PostTask(
991      BrowserThread::UI, FROM_HERE,
992      NewRunnableMethod(this, &TableBuilder::OnCompleteMainThread));
993}
994
995void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
996  if (master_)
997    master_->OnTableRebuildComplete(success_, fingerprints_);
998
999  // WILL (generally) DELETE THIS! This balances the AddRef in
1000  // VisitedLinkMaster::RebuildTableFromHistory.
1001  Release();
1002}
1003