hunspell_engine.cc revision effb81e5f8246d0db0270817048dc992db66e9fb
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/renderer/spellchecker/hunspell_engine.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <algorithm> 82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <iterator> 92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/files/memory_mapped_file.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/metrics/histogram.h" 12eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "base/time/time.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/spellcheck_common.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/spellcheck_messages.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/renderer/render_thread.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "third_party/hunspell/src/hunspell/hunspell.hxx" 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using base::TimeTicks; 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using content::RenderThread; 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace { 222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Maximum length of words we actually check. 232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // 64 is the observed limits for OSX system checker. 242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const size_t kMaxCheckedLen = 64; 252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Maximum length of words we provide suggestions for. 272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // 24 is the observed limits for OSX system checker. 282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const size_t kMaxSuggestLen = 24; 292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) COMPILE_ASSERT(kMaxCheckedLen <= size_t(MAXWORDLEN), MaxCheckedLen_too_long); 312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) COMPILE_ASSERT(kMaxSuggestLen <= kMaxCheckedLen, MaxSuggestLen_too_long); 32effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch} // namespace 332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if !defined(OS_MACOSX) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SpellingEngine* CreateNativeSpellingEngine() { 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return new HunspellEngine(); 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)HunspellEngine::HunspellEngine() 41effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch : hunspell_enabled_(false), 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) initialized_(false), 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dictionary_requested_(false) { 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Wait till we check the first word before doing any initializing. 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)HunspellEngine::~HunspellEngine() { 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 50effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochvoid HunspellEngine::Init(base::File file) { 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) initialized_ = true; 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) hunspell_.reset(); 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bdict_file_.reset(); 54effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch file_ = file.Pass(); 55effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch hunspell_enabled_ = file_.IsValid(); 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Delay the actual initialization of hunspell until it is needed. 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void HunspellEngine::InitializeHunspell() { 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (hunspell_.get()) 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bdict_file_.reset(new base::MemoryMappedFile); 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 65effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (bdict_file_->Initialize(file_.Pass())) { 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) TimeTicks debug_start_time = base::Histogram::DebugNow(); 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch hunspell_.reset(new Hunspell(bdict_file_->data(), bdict_file_->length())); 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DHISTOGRAM_TIMES("Spellcheck.InitTime", 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::Histogram::DebugNow() - debug_start_time); 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NOTREACHED() << "Could not mmap spellchecker dictionary."; 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 77a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)bool HunspellEngine::CheckSpelling(const base::string16& word_to_check, 78a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) int tag) { 792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Assume all words that cannot be checked are valid. Since Chrome can't 802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // offer suggestions on them, either, there's no point in flagging them to 812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // the user. 822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool word_correct = true; 835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string word_to_check_utf8(base::UTF16ToUTF8(word_to_check)); 842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Limit the size of checked words. 862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (word_to_check_utf8.length() <= kMaxCheckedLen) { 872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // If |hunspell_| is NULL here, an error has occurred, but it's better 882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // to check rather than crash. 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (hunspell_.get()) { 902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // |hunspell_->spell| returns 0 if the word is misspelled. 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) word_correct = (hunspell_->spell(word_to_check_utf8.c_str()) != 0); 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return word_correct; 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void HunspellEngine::FillSuggestionList( 99a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const base::string16& wrong_word, 100a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) std::vector<base::string16>* optional_suggestions) { 1015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string wrong_word_utf8(base::UTF16ToUTF8(wrong_word)); 1022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (wrong_word_utf8.length() > kMaxSuggestLen) 1032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return; 1042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If |hunspell_| is NULL here, an error has occurred, but it's better 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to check rather than crash. 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(groby): Technically, it's not. We should track down the issue. 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!hunspell_.get()) 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) char** suggestions = NULL; 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int number_of_suggestions = 1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) hunspell_->suggest(&suggestions, wrong_word_utf8.c_str()); 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Populate the vector of WideStrings. 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < number_of_suggestions; ++i) { 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (i < chrome::spellcheck_common::kMaxSuggestions) 1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) optional_suggestions->push_back(base::UTF8ToUTF16(suggestions[i])); 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) free(suggestions[i]); 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (suggestions != NULL) 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) free(suggestions); 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool HunspellEngine::InitializeIfNeeded() { 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!initialized_ && !dictionary_requested_) { 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // RenderThread will not exist in test. 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (RenderThread::Get()) 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RenderThread::Get()->Send(new SpellCheckHostMsg_RequestDictionary); 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dictionary_requested_ = true; 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Don't initialize if hunspell is disabled. 135effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (file_.IsValid()) 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) InitializeHunspell(); 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return !initialized_; 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool HunspellEngine::IsEnabled() { 142effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return hunspell_enabled_; 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 144