1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h"
6
7#include <functional>
8
9#include "base/file_util.h"
10#include "base/files/important_file_writer.h"
11#include "base/md5.h"
12#include "base/strings/string_number_conversions.h"
13#include "base/strings/string_split.h"
14#include "chrome/browser/spellchecker/spellcheck_host_metrics.h"
15#include "chrome/common/chrome_constants.h"
16#include "chrome/common/spellcheck_messages.h"
17#include "content/public/browser/browser_thread.h"
18#include "sync/api/sync_change.h"
19#include "sync/api/sync_data.h"
20#include "sync/api/sync_error_factory.h"
21#include "sync/protocol/sync.pb.h"
22
23using content::BrowserThread;
24using chrome::spellcheck_common::WordList;
25using chrome::spellcheck_common::WordSet;
26
27namespace {
28
29// Filename extension for backup dictionary file.
30const base::FilePath::CharType BACKUP_EXTENSION[] = FILE_PATH_LITERAL("backup");
31
32// Prefix for the checksum in the dictionary file.
33const char CHECKSUM_PREFIX[] = "checksum_v1 = ";
34
35// The status of the checksum in a custom spellcheck dictionary.
36enum ChecksumStatus {
37  VALID_CHECKSUM,
38  INVALID_CHECKSUM,
39};
40
41// The result of a dictionary sanitation. Can be used as a bitmap.
42enum ChangeSanitationResult {
43  // The change is valid and can be applied as-is.
44  VALID_CHANGE = 0,
45
46  // The change contained words to be added that are not valid.
47  DETECTED_INVALID_WORDS = 1,
48
49  // The change contained words to be added that are already in the dictionary.
50  DETECTED_DUPLICATE_WORDS = 2,
51
52  // The change contained words to be removed that are not in the dictionary.
53  DETECTED_MISSING_WORDS = 4,
54};
55
56// Loads the file at |file_path| into the |words| container. If the file has a
57// valid checksum, then returns ChecksumStatus::VALID. If the file has an
58// invalid checksum, then returns ChecksumStatus::INVALID and clears |words|.
59ChecksumStatus LoadFile(const base::FilePath& file_path, WordList& words) {
60  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
61  words.clear();
62  std::string contents;
63  file_util::ReadFileToString(file_path, &contents);
64  size_t pos = contents.rfind(CHECKSUM_PREFIX);
65  if (pos != std::string::npos) {
66    std::string checksum = contents.substr(pos + strlen(CHECKSUM_PREFIX));
67    contents = contents.substr(0, pos);
68    if (checksum != base::MD5String(contents))
69      return INVALID_CHECKSUM;
70  }
71  TrimWhitespaceASCII(contents, TRIM_ALL, &contents);
72  base::SplitString(contents, '\n', &words);
73  return VALID_CHECKSUM;
74}
75
76// Returns true for invalid words and false for valid words.
77bool IsInvalidWord(const std::string& word) {
78  std::string tmp;
79  return !IsStringUTF8(word) ||
80      word.length() >
81          chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES ||
82      word.empty() ||
83      TRIM_NONE != TrimWhitespaceASCII(word, TRIM_ALL, &tmp);
84}
85
86// Loads the custom spellcheck dictionary from |path| into |custom_words|. If
87// the dictionary checksum is not valid, but backup checksum is valid, then
88// restores the backup and loads that into |custom_words| instead. If the backup
89// is invalid too, then clears |custom_words|. Must be called on the file
90// thread.
91void LoadDictionaryFileReliably(WordList& custom_words,
92                                const base::FilePath& path) {
93  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
94  // Load the contents and verify the checksum.
95  if (LoadFile(path, custom_words) == VALID_CHECKSUM)
96    return;
97  // Checksum is not valid. See if there's a backup.
98  base::FilePath backup = path.AddExtension(BACKUP_EXTENSION);
99  if (!base::PathExists(backup))
100    return;
101  // Load the backup and verify its checksum.
102  if (LoadFile(backup, custom_words) != VALID_CHECKSUM)
103    return;
104  // Backup checksum is valid. Restore the backup.
105  base::CopyFile(backup, path);
106}
107
108// Backs up the original dictionary, saves |custom_words| and its checksum into
109// the custom spellcheck dictionary at |path|.
110void SaveDictionaryFileReliably(
111    const WordList& custom_words,
112    const base::FilePath& path) {
113  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
114  std::stringstream content;
115  for (WordList::const_iterator it = custom_words.begin();
116       it != custom_words.end();
117       ++it) {
118    content << *it << '\n';
119  }
120  std::string checksum = base::MD5String(content.str());
121  content << CHECKSUM_PREFIX << checksum;
122  base::CopyFile(path, path.AddExtension(BACKUP_EXTENSION));
123  base::ImportantFileWriter::WriteFileAtomically(path, content.str());
124}
125
126// Removes duplicate and invalid words from |to_add| word list and sorts it.
127// Looks for duplicates in both |to_add| and |existing| word lists. Returns a
128// bitmap of |ChangeSanitationResult| values.
129int SanitizeWordsToAdd(const WordSet& existing, WordList& to_add) {
130  // Do not add duplicate words.
131  std::sort(to_add.begin(), to_add.end());
132  WordList new_words;
133  std::set_difference(to_add.begin(),
134                      to_add.end(),
135                      existing.begin(),
136                      existing.end(),
137                      std::back_inserter(new_words));
138  new_words.erase(std::unique(new_words.begin(), new_words.end()),
139                  new_words.end());
140  int result = VALID_CHANGE;
141  if (to_add.size() != new_words.size())
142    result |= DETECTED_DUPLICATE_WORDS;
143  // Do not add invalid words.
144  size_t size = new_words.size();
145  new_words.erase(std::remove_if(new_words.begin(),
146                                 new_words.end(),
147                                 IsInvalidWord),
148                  new_words.end());
149  if (size != new_words.size())
150    result |= DETECTED_INVALID_WORDS;
151  // Save the sanitized words to be added.
152  std::swap(to_add, new_words);
153  return result;
154}
155
156// Removes word from |to_remove| that are missing from |existing| word list and
157// sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values.
158int SanitizeWordsToRemove(const WordSet& existing, WordList& to_remove) {
159  // Do not remove words that are missing from the dictionary.
160  std::sort(to_remove.begin(), to_remove.end());
161  WordList found_words;
162  std::set_intersection(existing.begin(),
163                        existing.end(),
164                        to_remove.begin(),
165                        to_remove.end(),
166                        std::back_inserter(found_words));
167  int result = VALID_CHANGE;
168  if (to_remove.size() > found_words.size())
169    result |= DETECTED_MISSING_WORDS;
170  // Save the sanitized words to be removed.
171  std::swap(to_remove, found_words);
172  return result;
173}
174
175}  // namespace
176
177
178SpellcheckCustomDictionary::Change::Change() {
179}
180
181SpellcheckCustomDictionary::Change::Change(
182    const SpellcheckCustomDictionary::Change& other)
183    : to_add_(other.to_add()),
184      to_remove_(other.to_remove()) {
185}
186
187SpellcheckCustomDictionary::Change::Change(const WordList& to_add)
188    : to_add_(to_add) {
189}
190
191SpellcheckCustomDictionary::Change::~Change() {
192}
193
194void SpellcheckCustomDictionary::Change::AddWord(const std::string& word) {
195  to_add_.push_back(word);
196}
197
198void SpellcheckCustomDictionary::Change::RemoveWord(const std::string& word) {
199  to_remove_.push_back(word);
200}
201
202int SpellcheckCustomDictionary::Change::Sanitize(const WordSet& words) {
203  int result = VALID_CHANGE;
204  if (!to_add_.empty())
205    result |= SanitizeWordsToAdd(words, to_add_);
206  if (!to_remove_.empty())
207    result |= SanitizeWordsToRemove(words, to_remove_);
208  return result;
209}
210
211const WordList& SpellcheckCustomDictionary::Change::to_add() const {
212  return to_add_;
213}
214
215const WordList& SpellcheckCustomDictionary::Change::to_remove() const {
216  return to_remove_;
217}
218
219bool SpellcheckCustomDictionary::Change::empty() const {
220  return to_add_.empty() && to_remove_.empty();
221}
222
223SpellcheckCustomDictionary::SpellcheckCustomDictionary(
224    const base::FilePath& path)
225    : custom_dictionary_path_(),
226      weak_ptr_factory_(this),
227      is_loaded_(false) {
228  custom_dictionary_path_ =
229      path.Append(chrome::kCustomDictionaryFileName);
230}
231
232SpellcheckCustomDictionary::~SpellcheckCustomDictionary() {
233}
234
235const WordSet& SpellcheckCustomDictionary::GetWords() const {
236  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
237  return words_;
238}
239
240bool SpellcheckCustomDictionary::AddWord(const std::string& word) {
241  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
242  Change dictionary_change;
243  dictionary_change.AddWord(word);
244  int result = dictionary_change.Sanitize(GetWords());
245  Apply(dictionary_change);
246  Notify(dictionary_change);
247  Sync(dictionary_change);
248  Save(dictionary_change);
249  return result == VALID_CHANGE;
250}
251
252bool SpellcheckCustomDictionary::RemoveWord(const std::string& word) {
253  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
254  Change dictionary_change;
255  dictionary_change.RemoveWord(word);
256  int result = dictionary_change.Sanitize(GetWords());
257  Apply(dictionary_change);
258  Notify(dictionary_change);
259  Sync(dictionary_change);
260  Save(dictionary_change);
261  return result == VALID_CHANGE;
262}
263
264bool SpellcheckCustomDictionary::HasWord(const std::string& word) const {
265  return !!words_.count(word);
266}
267
268void SpellcheckCustomDictionary::AddObserver(Observer* observer) {
269  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
270  observers_.AddObserver(observer);
271}
272
273void SpellcheckCustomDictionary::RemoveObserver(Observer* observer) {
274  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
275  observers_.RemoveObserver(observer);
276}
277
278bool SpellcheckCustomDictionary::IsLoaded() {
279  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
280  return is_loaded_;
281}
282
283bool SpellcheckCustomDictionary::IsSyncing() {
284  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
285  return !!sync_processor_.get();
286}
287
288void SpellcheckCustomDictionary::Load() {
289  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
290  BrowserThread::PostTaskAndReplyWithResult(
291      BrowserThread::FILE,
292      FROM_HERE,
293      base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile,
294                 custom_dictionary_path_),
295      base::Bind(&SpellcheckCustomDictionary::OnLoaded,
296                 weak_ptr_factory_.GetWeakPtr()));
297}
298
299syncer::SyncMergeResult SpellcheckCustomDictionary::MergeDataAndStartSyncing(
300    syncer::ModelType type,
301    const syncer::SyncDataList& initial_sync_data,
302    scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
303    scoped_ptr<syncer::SyncErrorFactory> sync_error_handler) {
304  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
305  DCHECK(!sync_processor_.get());
306  DCHECK(!sync_error_handler_.get());
307  DCHECK(sync_processor.get());
308  DCHECK(sync_error_handler.get());
309  DCHECK_EQ(syncer::DICTIONARY, type);
310  sync_processor_ = sync_processor.Pass();
311  sync_error_handler_ = sync_error_handler.Pass();
312
313  // Build a list of words to add locally.
314  WordList to_add_locally;
315  for (syncer::SyncDataList::const_iterator it = initial_sync_data.begin();
316       it != initial_sync_data.end();
317       ++it) {
318    DCHECK_EQ(syncer::DICTIONARY, it->GetDataType());
319    to_add_locally.push_back(it->GetSpecifics().dictionary().word());
320  }
321
322  // Add remote words locally.
323  Change to_change_locally(to_add_locally);
324  to_change_locally.Sanitize(GetWords());
325  Apply(to_change_locally);
326  Notify(to_change_locally);
327  Save(to_change_locally);
328
329  // Add as many as possible local words remotely.
330  std::sort(to_add_locally.begin(), to_add_locally.end());
331  WordList to_add_remotely;
332  std::set_difference(words_.begin(),
333                      words_.end(),
334                      to_add_locally.begin(),
335                      to_add_locally.end(),
336                      std::back_inserter(to_add_remotely));
337
338  // Send local changes to the sync server.
339  Change to_change_remotely(to_add_remotely);
340  syncer::SyncMergeResult result(type);
341  result.set_error(Sync(to_change_remotely));
342  return result;
343}
344
345void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type) {
346  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
347  DCHECK_EQ(syncer::DICTIONARY, type);
348  sync_processor_.reset();
349  sync_error_handler_.reset();
350}
351
352syncer::SyncDataList SpellcheckCustomDictionary::GetAllSyncData(
353    syncer::ModelType type) const {
354  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
355  DCHECK_EQ(syncer::DICTIONARY, type);
356  syncer::SyncDataList data;
357  std::string word;
358  size_t i = 0;
359  for (WordSet::const_iterator it = words_.begin();
360       it != words_.end() &&
361           i < chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS;
362       ++it, ++i) {
363    word = *it;
364    sync_pb::EntitySpecifics specifics;
365    specifics.mutable_dictionary()->set_word(word);
366    data.push_back(syncer::SyncData::CreateLocalData(word, word, specifics));
367  }
368  return data;
369}
370
371syncer::SyncError SpellcheckCustomDictionary::ProcessSyncChanges(
372    const tracked_objects::Location& from_here,
373    const syncer::SyncChangeList& change_list) {
374  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
375  Change dictionary_change;
376  for (syncer::SyncChangeList::const_iterator it = change_list.begin();
377       it != change_list.end();
378       ++it) {
379    DCHECK(it->IsValid());
380    std::string word = it->sync_data().GetSpecifics().dictionary().word();
381    switch (it->change_type()) {
382      case syncer::SyncChange::ACTION_ADD:
383        dictionary_change.AddWord(word);
384        break;
385      case syncer::SyncChange::ACTION_DELETE:
386        dictionary_change.RemoveWord(word);
387        break;
388      default:
389        return sync_error_handler_->CreateAndUploadError(
390            FROM_HERE,
391            "Processing sync changes failed on change type " +
392                syncer::SyncChange::ChangeTypeToString(it->change_type()));
393    }
394  }
395
396  dictionary_change.Sanitize(GetWords());
397  Apply(dictionary_change);
398  Notify(dictionary_change);
399  Save(dictionary_change);
400
401  return syncer::SyncError();
402}
403
404// static
405WordList SpellcheckCustomDictionary::LoadDictionaryFile(
406    const base::FilePath& path) {
407  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
408  WordList words;
409  LoadDictionaryFileReliably(words, path);
410  if (!words.empty() && VALID_CHANGE != SanitizeWordsToAdd(WordSet(), words))
411    SaveDictionaryFileReliably(words, path);
412  SpellCheckHostMetrics::RecordCustomWordCountStats(words.size());
413  return words;
414}
415
416// static
417void SpellcheckCustomDictionary::UpdateDictionaryFile(
418    const SpellcheckCustomDictionary::Change& dictionary_change,
419    const base::FilePath& path) {
420  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
421  if (dictionary_change.empty())
422    return;
423
424  WordList custom_words;
425  LoadDictionaryFileReliably(custom_words, path);
426
427  // Add words.
428  custom_words.insert(custom_words.end(),
429                      dictionary_change.to_add().begin(),
430                      dictionary_change.to_add().end());
431
432  // Remove words.
433  std::sort(custom_words.begin(), custom_words.end());
434  WordList remaining;
435  std::set_difference(custom_words.begin(),
436                      custom_words.end(),
437                      dictionary_change.to_remove().begin(),
438                      dictionary_change.to_remove().end(),
439                      std::back_inserter(remaining));
440  std::swap(custom_words, remaining);
441
442  SaveDictionaryFileReliably(custom_words, path);
443}
444
445void SpellcheckCustomDictionary::OnLoaded(WordList custom_words) {
446  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
447  Change dictionary_change(custom_words);
448  dictionary_change.Sanitize(GetWords());
449  Apply(dictionary_change);
450  Sync(dictionary_change);
451  is_loaded_ = true;
452  FOR_EACH_OBSERVER(Observer, observers_, OnCustomDictionaryLoaded());
453}
454
455void SpellcheckCustomDictionary::Apply(
456    const SpellcheckCustomDictionary::Change& dictionary_change) {
457  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
458  if (!dictionary_change.to_add().empty()) {
459    words_.insert(dictionary_change.to_add().begin(),
460                  dictionary_change.to_add().end());
461  }
462  if (!dictionary_change.to_remove().empty()) {
463    WordSet updated_words;
464    std::set_difference(words_.begin(),
465                        words_.end(),
466                        dictionary_change.to_remove().begin(),
467                        dictionary_change.to_remove().end(),
468                        std::inserter(updated_words, updated_words.end()));
469    std::swap(words_, updated_words);
470  }
471}
472
473void SpellcheckCustomDictionary::Save(
474    const SpellcheckCustomDictionary::Change& dictionary_change) {
475  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
476  BrowserThread::PostTask(
477      BrowserThread::FILE,
478      FROM_HERE,
479      base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile,
480                 dictionary_change,
481                 custom_dictionary_path_));
482}
483
484syncer::SyncError SpellcheckCustomDictionary::Sync(
485    const SpellcheckCustomDictionary::Change& dictionary_change) {
486  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
487  syncer::SyncError error;
488  if (!IsSyncing() || dictionary_change.empty())
489    return error;
490
491  // The number of words on the sync server should not exceed the limits.
492  int server_size = static_cast<int>(words_.size()) -
493      static_cast<int>(dictionary_change.to_add().size());
494  int max_upload_size = std::max(
495      0,
496      static_cast<int>(
497          chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) -
498          server_size);
499  int upload_size = std::min(
500      static_cast<int>(dictionary_change.to_add().size()),
501      max_upload_size);
502
503  syncer::SyncChangeList sync_change_list;
504  int i = 0;
505
506  for (WordList::const_iterator it = dictionary_change.to_add().begin();
507       it != dictionary_change.to_add().end() && i < upload_size;
508       ++it, ++i) {
509    std::string word = *it;
510    sync_pb::EntitySpecifics specifics;
511    specifics.mutable_dictionary()->set_word(word);
512    sync_change_list.push_back(syncer::SyncChange(
513        FROM_HERE,
514        syncer::SyncChange::ACTION_ADD,
515        syncer::SyncData::CreateLocalData(word, word, specifics)));
516  }
517
518  for (WordList::const_iterator it = dictionary_change.to_remove().begin();
519       it != dictionary_change.to_remove().end();
520       ++it) {
521    std::string word = *it;
522    sync_pb::EntitySpecifics specifics;
523    specifics.mutable_dictionary()->set_word(word);
524    sync_change_list.push_back(syncer::SyncChange(
525        FROM_HERE,
526        syncer::SyncChange::ACTION_DELETE,
527        syncer::SyncData::CreateLocalData(word, word, specifics)));
528  }
529
530  // Send the changes to the sync processor.
531  error = sync_processor_->ProcessSyncChanges(FROM_HERE, sync_change_list);
532  if (error.IsSet())
533    return error;
534
535  // Turn off syncing of this dictionary if the server already has the maximum
536  // number of words.
537  if (words_.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS)
538    StopSyncing(syncer::DICTIONARY);
539
540  return error;
541}
542
543void SpellcheckCustomDictionary::Notify(
544    const SpellcheckCustomDictionary::Change& dictionary_change) {
545  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
546  if (!IsLoaded() || dictionary_change.empty())
547    return;
548  FOR_EACH_OBSERVER(Observer,
549                    observers_,
550                    OnCustomDictionaryChanged(dictionary_change));
551}
552