hunspell_engine.cc revision effb81e5f8246d0db0270817048dc992db66e9fb
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/renderer/spellchecker/hunspell_engine.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <algorithm>
82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <iterator>
92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/files/memory_mapped_file.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/metrics/histogram.h"
12eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "base/time/time.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/spellcheck_common.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/spellcheck_messages.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/renderer/render_thread.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "third_party/hunspell/src/hunspell/hunspell.hxx"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using base::TimeTicks;
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::RenderThread;
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace {
222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Maximum length of words we actually check.
232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // 64 is the observed limits for OSX system checker.
242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const size_t kMaxCheckedLen = 64;
252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Maximum length of words we provide suggestions for.
272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // 24 is the observed limits for OSX system checker.
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const size_t kMaxSuggestLen = 24;
292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  COMPILE_ASSERT(kMaxCheckedLen <= size_t(MAXWORDLEN), MaxCheckedLen_too_long);
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  COMPILE_ASSERT(kMaxSuggestLen <= kMaxCheckedLen, MaxSuggestLen_too_long);
32effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch}  // namespace
332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if !defined(OS_MACOSX)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SpellingEngine* CreateNativeSpellingEngine() {
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return new HunspellEngine();
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)HunspellEngine::HunspellEngine()
41effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    : hunspell_enabled_(false),
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      initialized_(false),
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      dictionary_requested_(false) {
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Wait till we check the first word before doing any initializing.
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)HunspellEngine::~HunspellEngine() {
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
50effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochvoid HunspellEngine::Init(base::File file) {
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  initialized_ = true;
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  hunspell_.reset();
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bdict_file_.reset();
54effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  file_ = file.Pass();
55effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  hunspell_enabled_ = file_.IsValid();
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Delay the actual initialization of hunspell until it is needed.
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void HunspellEngine::InitializeHunspell() {
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (hunspell_.get())
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bdict_file_.reset(new base::MemoryMappedFile);
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
65effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (bdict_file_->Initialize(file_.Pass())) {
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    TimeTicks debug_start_time = base::Histogram::DebugNow();
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    hunspell_.reset(new Hunspell(bdict_file_->data(), bdict_file_->length()));
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DHISTOGRAM_TIMES("Spellcheck.InitTime",
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     base::Histogram::DebugNow() - debug_start_time);
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NOTREACHED() << "Could not mmap spellchecker dictionary.";
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
77a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)bool HunspellEngine::CheckSpelling(const base::string16& word_to_check,
78a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)                                   int tag) {
792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Assume all words that cannot be checked are valid. Since Chrome can't
802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // offer suggestions on them, either, there's no point in flagging them to
812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // the user.
822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool word_correct = true;
835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  std::string word_to_check_utf8(base::UTF16ToUTF8(word_to_check));
842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Limit the size of checked words.
862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (word_to_check_utf8.length() <= kMaxCheckedLen) {
872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // If |hunspell_| is NULL here, an error has occurred, but it's better
882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // to check rather than crash.
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (hunspell_.get()) {
902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // |hunspell_->spell| returns 0 if the word is misspelled.
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      word_correct = (hunspell_->spell(word_to_check_utf8.c_str()) != 0);
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return word_correct;
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void HunspellEngine::FillSuggestionList(
99a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    const base::string16& wrong_word,
100a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    std::vector<base::string16>* optional_suggestions) {
1015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  std::string wrong_word_utf8(base::UTF16ToUTF8(wrong_word));
1022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (wrong_word_utf8.length() > kMaxSuggestLen)
1032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return;
1042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If |hunspell_| is NULL here, an error has occurred, but it's better
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // to check rather than crash.
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(groby): Technically, it's not. We should track down the issue.
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!hunspell_.get())
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  char** suggestions = NULL;
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int number_of_suggestions =
1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      hunspell_->suggest(&suggestions, wrong_word_utf8.c_str());
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Populate the vector of WideStrings.
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < number_of_suggestions; ++i) {
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (i < chrome::spellcheck_common::kMaxSuggestions)
1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      optional_suggestions->push_back(base::UTF8ToUTF16(suggestions[i]));
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    free(suggestions[i]);
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (suggestions != NULL)
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    free(suggestions);
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool HunspellEngine::InitializeIfNeeded() {
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!initialized_ && !dictionary_requested_) {
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // RenderThread will not exist in test.
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (RenderThread::Get())
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      RenderThread::Get()->Send(new SpellCheckHostMsg_RequestDictionary);
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    dictionary_requested_ = true;
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Don't initialize if hunspell is disabled.
135effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (file_.IsValid())
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    InitializeHunspell();
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return !initialized_;
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool HunspellEngine::IsEnabled() {
142effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  return hunspell_enabled_;
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
144