15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/renderer/spellchecker/hunspell_engine.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <algorithm> 82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <iterator> 92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/files/memory_mapped_file.h" 11eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "base/time/time.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/spellcheck_common.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/spellcheck_messages.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/renderer/render_thread.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "third_party/hunspell/src/hunspell/hunspell.hxx" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::RenderThread; 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace { 202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Maximum length of words we actually check. 212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // 64 is the observed limits for OSX system checker. 222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const size_t kMaxCheckedLen = 64; 232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Maximum length of words we provide suggestions for. 252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // 24 is the observed limits for OSX system checker. 262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const size_t kMaxSuggestLen = 24; 272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) COMPILE_ASSERT(kMaxCheckedLen <= size_t(MAXWORDLEN), MaxCheckedLen_too_long); 292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) COMPILE_ASSERT(kMaxSuggestLen <= kMaxCheckedLen, MaxSuggestLen_too_long); 30effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch} // namespace 312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if !defined(OS_MACOSX) 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SpellingEngine* CreateNativeSpellingEngine() { 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return new HunspellEngine(); 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)HunspellEngine::HunspellEngine() 39effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch : hunspell_enabled_(false), 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) initialized_(false), 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dictionary_requested_(false) { 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Wait till we check the first word before doing any initializing. 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)HunspellEngine::~HunspellEngine() { 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochvoid HunspellEngine::Init(base::File file) { 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) initialized_ = true; 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) hunspell_.reset(); 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bdict_file_.reset(); 52effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch file_ = file.Pass(); 53effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch hunspell_enabled_ = file_.IsValid(); 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Delay the actual initialization of hunspell until it is needed. 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void HunspellEngine::InitializeHunspell() { 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (hunspell_.get()) 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bdict_file_.reset(new base::MemoryMappedFile); 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (bdict_file_->Initialize(file_.Pass())) { 64effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch hunspell_.reset(new Hunspell(bdict_file_->data(), bdict_file_->length())); 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NOTREACHED() << "Could not mmap spellchecker dictionary."; 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 70a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)bool HunspellEngine::CheckSpelling(const base::string16& word_to_check, 71a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) int tag) { 722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Assume all words that cannot be checked are valid. Since Chrome can't 732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // offer suggestions on them, either, there's no point in flagging them to 742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // the user. 752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool word_correct = true; 765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string word_to_check_utf8(base::UTF16ToUTF8(word_to_check)); 772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Limit the size of checked words. 792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (word_to_check_utf8.length() <= kMaxCheckedLen) { 802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // If |hunspell_| is NULL here, an error has occurred, but it's better 812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // to check rather than crash. 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (hunspell_.get()) { 832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // |hunspell_->spell| returns 0 if the word is misspelled. 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) word_correct = (hunspell_->spell(word_to_check_utf8.c_str()) != 0); 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return word_correct; 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void HunspellEngine::FillSuggestionList( 92a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const base::string16& wrong_word, 93a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) std::vector<base::string16>* optional_suggestions) { 945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string wrong_word_utf8(base::UTF16ToUTF8(wrong_word)); 952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (wrong_word_utf8.length() > kMaxSuggestLen) 962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return; 972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If |hunspell_| is NULL here, an error has occurred, but it's better 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to check rather than crash. 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(groby): Technically, it's not. We should track down the issue. 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!hunspell_.get()) 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) char** suggestions = NULL; 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int number_of_suggestions = 1062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) hunspell_->suggest(&suggestions, wrong_word_utf8.c_str()); 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Populate the vector of WideStrings. 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < number_of_suggestions; ++i) { 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (i < chrome::spellcheck_common::kMaxSuggestions) 1115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) optional_suggestions->push_back(base::UTF8ToUTF16(suggestions[i])); 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) free(suggestions[i]); 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (suggestions != NULL) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) free(suggestions); 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool HunspellEngine::InitializeIfNeeded() { 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!initialized_ && !dictionary_requested_) { 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // RenderThread will not exist in test. 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (RenderThread::Get()) 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RenderThread::Get()->Send(new SpellCheckHostMsg_RequestDictionary); 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dictionary_requested_ = true; 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Don't initialize if hunspell is disabled. 128effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (file_.IsValid()) 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) InitializeHunspell(); 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return !initialized_; 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool HunspellEngine::IsEnabled() { 135effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return hunspell_enabled_; 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 137