15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/renderer/spellchecker/hunspell_engine.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <algorithm>
82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <iterator>
92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/files/memory_mapped_file.h"
11eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "base/time/time.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/spellcheck_common.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/spellcheck_messages.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/renderer/render_thread.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "third_party/hunspell/src/hunspell/hunspell.hxx"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::RenderThread;
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace {
202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Maximum length of words we actually check.
212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // 64 is the observed limits for OSX system checker.
222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const size_t kMaxCheckedLen = 64;
232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Maximum length of words we provide suggestions for.
252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // 24 is the observed limits for OSX system checker.
262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const size_t kMaxSuggestLen = 24;
272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  COMPILE_ASSERT(kMaxCheckedLen <= size_t(MAXWORDLEN), MaxCheckedLen_too_long);
292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  COMPILE_ASSERT(kMaxSuggestLen <= kMaxCheckedLen, MaxSuggestLen_too_long);
30effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch}  // namespace
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if !defined(OS_MACOSX)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SpellingEngine* CreateNativeSpellingEngine() {
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return new HunspellEngine();
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)HunspellEngine::HunspellEngine()
39effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    : hunspell_enabled_(false),
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      initialized_(false),
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      dictionary_requested_(false) {
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Wait till we check the first word before doing any initializing.
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)HunspellEngine::~HunspellEngine() {
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochvoid HunspellEngine::Init(base::File file) {
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  initialized_ = true;
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  hunspell_.reset();
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bdict_file_.reset();
52effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  file_ = file.Pass();
53effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  hunspell_enabled_ = file_.IsValid();
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Delay the actual initialization of hunspell until it is needed.
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void HunspellEngine::InitializeHunspell() {
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (hunspell_.get())
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bdict_file_.reset(new base::MemoryMappedFile);
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
63effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (bdict_file_->Initialize(file_.Pass())) {
64effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    hunspell_.reset(new Hunspell(bdict_file_->data(), bdict_file_->length()));
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NOTREACHED() << "Could not mmap spellchecker dictionary.";
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
70a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)bool HunspellEngine::CheckSpelling(const base::string16& word_to_check,
71a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)                                   int tag) {
722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Assume all words that cannot be checked are valid. Since Chrome can't
732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // offer suggestions on them, either, there's no point in flagging them to
742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // the user.
752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool word_correct = true;
765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  std::string word_to_check_utf8(base::UTF16ToUTF8(word_to_check));
772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Limit the size of checked words.
792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (word_to_check_utf8.length() <= kMaxCheckedLen) {
802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // If |hunspell_| is NULL here, an error has occurred, but it's better
812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // to check rather than crash.
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (hunspell_.get()) {
832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // |hunspell_->spell| returns 0 if the word is misspelled.
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      word_correct = (hunspell_->spell(word_to_check_utf8.c_str()) != 0);
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return word_correct;
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void HunspellEngine::FillSuggestionList(
92a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    const base::string16& wrong_word,
93a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    std::vector<base::string16>* optional_suggestions) {
945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  std::string wrong_word_utf8(base::UTF16ToUTF8(wrong_word));
952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (wrong_word_utf8.length() > kMaxSuggestLen)
962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return;
972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If |hunspell_| is NULL here, an error has occurred, but it's better
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // to check rather than crash.
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(groby): Technically, it's not. We should track down the issue.
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!hunspell_.get())
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  char** suggestions = NULL;
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int number_of_suggestions =
1062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      hunspell_->suggest(&suggestions, wrong_word_utf8.c_str());
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Populate the vector of WideStrings.
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < number_of_suggestions; ++i) {
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (i < chrome::spellcheck_common::kMaxSuggestions)
1115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      optional_suggestions->push_back(base::UTF8ToUTF16(suggestions[i]));
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    free(suggestions[i]);
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (suggestions != NULL)
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    free(suggestions);
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool HunspellEngine::InitializeIfNeeded() {
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!initialized_ && !dictionary_requested_) {
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // RenderThread will not exist in test.
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (RenderThread::Get())
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      RenderThread::Get()->Send(new SpellCheckHostMsg_RequestDictionary);
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    dictionary_requested_ = true;
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Don't initialize if hunspell is disabled.
128effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if (file_.IsValid())
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    InitializeHunspell();
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return !initialized_;
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool HunspellEngine::IsEnabled() {
135effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  return hunspell_enabled_;
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
137