spellchecker_platform_engine_unittest.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/string_util.h" 6#include "chrome/browser/spellchecker_platform_engine.h" 7#include "testing/gtest/include/gtest/gtest.h" 8 9// Tests that words are properly ignored. Currently only enabled on OS X as it 10// is the only platform to support ignoring words. Note that in this test, we 11// supply a non-zero doc_tag, in order to test that ignored words are matched to 12// the correct document. 13TEST(PlatformSpellCheckTest, IgnoreWords_EN_US) { 14 const char* kTestCases[] = { 15 "teh", 16 "morblier", 17 "watre", 18 "noooen", 19 }; 20 21 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { 22 const string16 word(ASCIIToUTF16(kTestCases[i])); 23 const int doc_tag = SpellCheckerPlatform::GetDocumentTag(); 24 25 // The word should show up as misspelled. 26 EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word; 27 28 // Ignore the word. 29 SpellCheckerPlatform::IgnoreWord(word); 30 31 // The word should now show up as correctly spelled. 32 EXPECT_TRUE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word; 33 34 // Close the docuemnt. Any words that we had previously ignored should no 35 // longer be ignored and thus should show up as misspelled. 36 SpellCheckerPlatform::CloseDocumentWithTag(doc_tag); 37 38 // The word should now show be spelled wrong again 39 EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word; 40 } 41} // Test IgnoreWords_EN_US 42 43TEST(PlatformSpellCheckTest, SpellCheckSuggestions_EN_US) { 44 static const struct { 45 const char* input; // A string to be tested. 46 const char* suggested_word; // A suggested word that should occur. 47 } kTestCases[] = { 48 // We need to have separate test cases here, since hunspell and the OS X 49 // spellchecking service occasionally differ on what they consider a valid 50 // suggestion for a given word, although these lists could likely be 51 // integrated somewhat. The test cases for non-Mac are in 52 // chrome/renderer/spellcheck_unittest.cc 53 // These words come from the wikipedia page of the most commonly 54 // misspelled words in english. 55 // (http://en.wikipedia.org/wiki/Commonly_misspelled_words). 56 // However, 10.6 loads multiple dictionaries and enables many non-English 57 // dictionaries by default. As a result, we have removed from the list any 58 // word that is marked as correct because it is correct in another 59 // language. 60 {"absense", "absence"}, 61 {"acceptible", "acceptable"}, 62 {"accidentaly", "accidentally"}, 63 {"acheive", "achieve"}, 64 {"acknowlege", "acknowledge"}, 65 {"acquaintence", "acquaintance"}, 66 {"aquire", "acquire"}, 67 {"aquit", "acquit"}, 68 {"acrage", "acreage"}, 69 {"adultary", "adultery"}, 70 {"advertize", "advertise"}, 71 {"adviseable", "advisable"}, 72 {"alchohol", "alcohol"}, 73 {"alege", "allege"}, 74 {"allegaince", "allegiance"}, 75 {"allmost", "almost"}, 76 // Ideally, this test should pass. It works in firefox, but not in hunspell 77 // or OS X. 78 // {"alot", "a lot"}, 79 {"amatuer", "amateur"}, 80 {"ammend", "amend"}, 81 {"amung", "among"}, 82 {"anually", "annually"}, 83 {"apparant", "apparent"}, 84 {"artic", "arctic"}, 85 {"arguement", "argument"}, 86 {"athiest", "atheist"}, 87 {"athelete", "athlete"}, 88 {"avrage", "average"}, 89 {"awfull", "awful"}, 90 {"ballance", "balance"}, 91 {"basicly", "basically"}, 92 {"becuase", "because"}, 93 {"becomeing", "becoming"}, 94 {"befor", "before"}, 95 {"begining", "beginning"}, 96 {"beleive", "believe"}, 97 {"bellweather", "bellwether"}, 98 {"benifit", "benefit"}, 99 {"bouy", "buoy"}, 100 {"briliant", "brilliant"}, 101 {"burgler", "burglar"}, 102 {"camoflage", "camouflage"}, 103 {"carefull", "careful"}, 104 {"Carribean", "Caribbean"}, 105 {"catagory", "category"}, 106 {"cauhgt", "caught"}, 107 {"cieling", "ceiling"}, 108 {"cemetary", "cemetery"}, 109 {"certin", "certain"}, 110 {"changable", "changeable"}, 111 {"cheif", "chief"}, 112 {"citezen", "citizen"}, 113 {"collaegue", "colleague"}, 114 {"colum", "column"}, 115 {"comming", "coming"}, 116 {"commited", "committed"}, 117 {"compitition", "competition"}, 118 {"conceed", "concede"}, 119 {"congradulate", "congratulate"}, 120 {"consciencious", "conscientious"}, 121 {"concious", "conscious"}, 122 {"concensus", "consensus"}, 123 {"contraversy", "controversy"}, 124 {"conveniance", "convenience"}, 125 {"critecize", "criticize"}, 126 {"dacquiri", "daiquiri"}, 127 {"decieve", "deceive"}, 128 {"dicide", "decide"}, 129 {"definate", "definite"}, 130 {"definitly", "definitely"}, 131 {"desparate", "desperate"}, 132 {"develope", "develop"}, 133 {"diffrence", "difference"}, 134 {"disapear", "disappear"}, 135 {"disapoint", "disappoint"}, 136 {"disasterous", "disastrous"}, 137 {"disipline", "discipline"}, 138 {"drunkeness", "drunkenness"}, 139 {"dumbell", "dumbbell"}, 140 {"easely", "easily"}, 141 {"eigth", "eight"}, 142 {"embarass", "embarrass"}, 143 {"enviroment", "environment"}, 144 {"equiped", "equipped"}, 145 {"equiptment", "equipment"}, 146 {"exagerate", "exaggerate"}, 147 {"exellent", "excellent"}, 148 {"exsept", "except"}, 149 {"exercize", "exercise"}, 150 {"exilerate", "exhilarate"}, 151 {"existance", "existence"}, 152 {"experiance", "experience"}, 153 {"experament", "experiment"}, 154 {"explaination", "explanation"}, 155 {"facinating", "fascinating"}, 156 {"firey", "fiery"}, 157 {"finaly", "finally"}, 158 {"flourescent", "fluorescent"}, 159 {"foriegn", "foreign"}, 160 {"fourty", "forty"}, 161 {"foreward", "forward"}, 162 {"freind", "friend"}, 163 {"fundemental", "fundamental"}, 164 {"guage", "gauge"}, 165 {"generaly", "generally"}, 166 {"goverment", "government"}, 167 {"gratefull", "grateful"}, 168 {"garantee", "guarantee"}, 169 {"guidence", "guidance"}, 170 {"happyness", "happiness"}, 171 {"harrass", "harass"}, 172 {"heighth", "height"}, 173 {"heirarchy", "hierarchy"}, 174 {"humerous", "humorous"}, 175 {"hygene", "hygiene"}, 176 {"hipocrit", "hypocrite"}, 177 {"idenity", "identity"}, 178 {"ignorence", "ignorance"}, 179 {"imaginery", "imaginary"}, 180 {"immitate", "imitate"}, 181 {"immitation", "imitation"}, 182 {"imediately", "immediately"}, 183 {"incidently", "incidentally"}, 184 {"independant", "independent"}, 185 {"indispensible", "indispensable"}, 186 {"innoculate", "inoculate"}, 187 {"inteligence", "intelligence"}, 188 {"intresting", "interesting"}, 189 {"interuption", "interruption"}, 190 {"irrelevent", "irrelevant"}, 191 {"irritible", "irritable"}, 192 {"jellous", "jealous"}, 193 {"knowlege", "knowledge"}, 194 {"labratory", "laboratory"}, 195 {"lenght", "length"}, 196 {"liason", "liaison"}, 197 {"libary", "library"}, 198 {"lisence", "license"}, 199 {"lonelyness", "loneliness"}, 200 {"lieing", "lying"}, 201 {"maintenence", "maintenance"}, 202 {"manuever", "maneuver"}, 203 {"marrige", "marriage"}, 204 {"mathmatics", "mathematics"}, 205 {"medcine", "medicine"}, 206 {"miniture", "miniature"}, 207 {"minite", "minute"}, 208 {"mischevous", "mischievous"}, 209 {"mispell", "misspell"}, 210 // Maybe this one should pass, as it works in hunspell, but not in firefox. 211 // {"misterius", "mysterious"}, 212 {"naturaly", "naturally"}, 213 {"neccessary", "necessary"}, 214 {"neice", "niece"}, 215 {"nieghbor", "neighbor"}, 216 {"nieghbour", "neighbor"}, 217 {"niether", "neither"}, 218 {"noticable", "noticeable"}, 219 {"occassion", "occasion"}, 220 {"occasionaly", "occasionally"}, 221 {"occurrance", "occurrence"}, 222 {"occured", "occurred"}, 223 {"ommision", "omission"}, 224 {"oppurtunity", "opportunity"}, 225 {"outragous", "outrageous"}, 226 {"parrallel", "parallel"}, 227 {"parliment", "parliament"}, 228 {"particurly", "particularly"}, 229 {"passtime", "pastime"}, 230 {"peculier", "peculiar"}, 231 {"percieve", "perceive"}, 232 {"pernament", "permanent"}, 233 {"perseverence", "perseverance"}, 234 {"personaly", "personally"}, 235 {"persaude", "persuade"}, 236 {"pichure", "picture"}, 237 {"peice", "piece"}, 238 {"plagerize", "plagiarize"}, 239 {"playright", "playwright"}, 240 {"plesant", "pleasant"}, 241 {"pollitical", "political"}, 242 {"posession", "possession"}, 243 {"potatos", "potatoes"}, 244 {"practicle", "practical"}, 245 {"preceed", "precede"}, 246 {"predjudice", "prejudice"}, 247 {"presance", "presence"}, 248 {"privelege", "privilege"}, 249 // This one should probably work. It does in FF and Hunspell. 250 // {"probly", "probably"}, 251 {"proffesional", "professional"}, 252 {"promiss", "promise"}, 253 {"pronounciation", "pronunciation"}, 254 {"prufe", "proof"}, 255 {"psycology", "psychology"}, 256 {"publically", "publicly"}, 257 {"quanity", "quantity"}, 258 {"quarentine", "quarantine"}, 259 {"questionaire", "questionnaire"}, 260 {"readible", "readable"}, 261 {"realy", "really"}, 262 {"recieve", "receive"}, 263 {"reciept", "receipt"}, 264 {"reconize", "recognize"}, 265 {"recomend", "recommend"}, 266 {"refered", "referred"}, 267 {"referance", "reference"}, 268 {"relevent", "relevant"}, 269 {"religous", "religious"}, 270 {"repitition", "repetition"}, 271 {"restarant", "restaurant"}, 272 {"rythm", "rhythm"}, 273 {"rediculous", "ridiculous"}, 274 {"sacrefice", "sacrifice"}, 275 {"saftey", "safety"}, 276 {"sissors", "scissors"}, 277 {"secratary", "secretary"}, 278 {"seperate", "separate"}, 279 {"sargent", "sergeant"}, 280 {"shineing", "shining"}, 281 {"similer", "similar"}, 282 {"sinceerly", "sincerely"}, 283 {"speach", "speech"}, 284 {"strenght", "strength"}, 285 {"succesful", "successful"}, 286 {"supercede", "supersede"}, 287 {"surelly", "surely"}, 288 {"suprise", "surprise"}, 289 {"temperture", "temperature"}, 290 {"temprary", "temporary"}, 291 {"tommorrow", "tomorrow"}, 292 {"tounge", "tongue"}, 293 {"truely", "truly"}, 294 {"twelth", "twelfth"}, 295 {"tyrany", "tyranny"}, 296 {"underate", "underrate"}, 297 {"untill", "until"}, 298 {"unuseual", "unusual"}, 299 {"upholstry", "upholstery"}, 300 {"usible", "usable"}, 301 {"useing", "using"}, 302 {"usualy", "usually"}, 303 {"vaccuum", "vacuum"}, 304 {"vegatarian", "vegetarian"}, 305 {"vehical", "vehicle"}, 306 {"visious", "vicious"}, 307 {"villege", "village"}, 308 {"wierd", "weird"}, 309 {"wellcome", "welcome"}, 310 {"wellfare", "welfare"}, 311 {"wilfull", "willful"}, 312 {"withold", "withhold"}, 313 {"writting", "writing"}, 314 }; 315 316 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) { 317 const string16 word(ASCIIToUTF16(kTestCases[i].input)); 318 EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, 0)) << word; 319 320 // Check if the suggested words occur. 321 std::vector<string16> suggestions; 322 SpellCheckerPlatform::FillSuggestionList(word, &suggestions); 323 bool suggested_word_is_present = false; 324 const string16 suggested_word(ASCIIToUTF16(kTestCases[i].suggested_word)); 325 for (size_t j = 0; j < suggestions.size(); j++) { 326 if (suggestions[j].compare(suggested_word) == 0) { 327 suggested_word_is_present = true; 328 break; 329 } 330 } 331 EXPECT_TRUE(suggested_word_is_present) << suggested_word; 332 } 333} 334