spellchecker_platform_engine_unittest.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/string_util.h"
6#include "chrome/browser/spellchecker_platform_engine.h"
7#include "testing/gtest/include/gtest/gtest.h"
8
9// Tests that words are properly ignored. Currently only enabled on OS X as it
10// is the only platform to support ignoring words. Note that in this test, we
11// supply a non-zero doc_tag, in order to test that ignored words are matched to
12// the correct document.
13TEST(PlatformSpellCheckTest, IgnoreWords_EN_US) {
14  const char* kTestCases[] = {
15    "teh",
16    "morblier",
17    "watre",
18    "noooen",
19  };
20
21  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
22    const string16 word(ASCIIToUTF16(kTestCases[i]));
23    const int doc_tag = SpellCheckerPlatform::GetDocumentTag();
24
25    // The word should show up as misspelled.
26    EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word;
27
28    // Ignore the word.
29    SpellCheckerPlatform::IgnoreWord(word);
30
31    // The word should now show up as correctly spelled.
32    EXPECT_TRUE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word;
33
34    // Close the docuemnt. Any words that we had previously ignored should no
35    // longer be ignored and thus should show up as misspelled.
36    SpellCheckerPlatform::CloseDocumentWithTag(doc_tag);
37
38    // The word should now show be spelled wrong again
39    EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word;
40  }
41}  // Test IgnoreWords_EN_US
42
43TEST(PlatformSpellCheckTest, SpellCheckSuggestions_EN_US) {
44  static const struct {
45    const char* input;           // A string to be tested.
46    const char* suggested_word;  // A suggested word that should occur.
47  } kTestCases[] = {
48    // We need to have separate test cases here, since hunspell and the OS X
49    // spellchecking service occasionally differ on what they consider a valid
50    // suggestion for a given word, although these lists could likely be
51    // integrated somewhat. The test cases for non-Mac are in
52    // chrome/renderer/spellcheck_unittest.cc
53    // These words come from the wikipedia page of the most commonly
54    // misspelled words in english.
55    // (http://en.wikipedia.org/wiki/Commonly_misspelled_words).
56    // However, 10.6 loads multiple dictionaries and enables many non-English
57    // dictionaries by default. As a result, we have removed from the list any
58    // word that is marked as correct because it is correct in another
59    // language.
60    {"absense", "absence"},
61    {"acceptible", "acceptable"},
62    {"accidentaly", "accidentally"},
63    {"acheive", "achieve"},
64    {"acknowlege", "acknowledge"},
65    {"acquaintence", "acquaintance"},
66    {"aquire", "acquire"},
67    {"aquit", "acquit"},
68    {"acrage", "acreage"},
69    {"adultary", "adultery"},
70    {"advertize", "advertise"},
71    {"adviseable", "advisable"},
72    {"alchohol", "alcohol"},
73    {"alege", "allege"},
74    {"allegaince", "allegiance"},
75    {"allmost", "almost"},
76    // Ideally, this test should pass. It works in firefox, but not in hunspell
77    // or OS X.
78    // {"alot", "a lot"},
79    {"amatuer", "amateur"},
80    {"ammend", "amend"},
81    {"amung", "among"},
82    {"anually", "annually"},
83    {"apparant", "apparent"},
84    {"artic", "arctic"},
85    {"arguement", "argument"},
86    {"athiest", "atheist"},
87    {"athelete", "athlete"},
88    {"avrage", "average"},
89    {"awfull", "awful"},
90    {"ballance", "balance"},
91    {"basicly", "basically"},
92    {"becuase", "because"},
93    {"becomeing", "becoming"},
94    {"befor", "before"},
95    {"begining", "beginning"},
96    {"beleive", "believe"},
97    {"bellweather", "bellwether"},
98    {"benifit", "benefit"},
99    {"bouy", "buoy"},
100    {"briliant", "brilliant"},
101    {"burgler", "burglar"},
102    {"camoflage", "camouflage"},
103    {"carefull", "careful"},
104    {"Carribean", "Caribbean"},
105    {"catagory", "category"},
106    {"cauhgt", "caught"},
107    {"cieling", "ceiling"},
108    {"cemetary", "cemetery"},
109    {"certin", "certain"},
110    {"changable", "changeable"},
111    {"cheif", "chief"},
112    {"citezen", "citizen"},
113    {"collaegue", "colleague"},
114    {"colum", "column"},
115    {"comming", "coming"},
116    {"commited", "committed"},
117    {"compitition", "competition"},
118    {"conceed", "concede"},
119    {"congradulate", "congratulate"},
120    {"consciencious", "conscientious"},
121    {"concious", "conscious"},
122    {"concensus", "consensus"},
123    {"contraversy", "controversy"},
124    {"conveniance", "convenience"},
125    {"critecize", "criticize"},
126    {"dacquiri", "daiquiri"},
127    {"decieve", "deceive"},
128    {"dicide", "decide"},
129    {"definate", "definite"},
130    {"definitly", "definitely"},
131    {"desparate", "desperate"},
132    {"develope", "develop"},
133    {"diffrence", "difference"},
134    {"disapear", "disappear"},
135    {"disapoint", "disappoint"},
136    {"disasterous", "disastrous"},
137    {"disipline", "discipline"},
138    {"drunkeness", "drunkenness"},
139    {"dumbell", "dumbbell"},
140    {"easely", "easily"},
141    {"eigth", "eight"},
142    {"embarass", "embarrass"},
143    {"enviroment", "environment"},
144    {"equiped", "equipped"},
145    {"equiptment", "equipment"},
146    {"exagerate", "exaggerate"},
147    {"exellent", "excellent"},
148    {"exsept", "except"},
149    {"exercize", "exercise"},
150    {"exilerate", "exhilarate"},
151    {"existance", "existence"},
152    {"experiance", "experience"},
153    {"experament", "experiment"},
154    {"explaination", "explanation"},
155    {"facinating", "fascinating"},
156    {"firey", "fiery"},
157    {"finaly", "finally"},
158    {"flourescent", "fluorescent"},
159    {"foriegn", "foreign"},
160    {"fourty", "forty"},
161    {"foreward", "forward"},
162    {"freind", "friend"},
163    {"fundemental", "fundamental"},
164    {"guage", "gauge"},
165    {"generaly", "generally"},
166    {"goverment", "government"},
167    {"gratefull", "grateful"},
168    {"garantee", "guarantee"},
169    {"guidence", "guidance"},
170    {"happyness", "happiness"},
171    {"harrass", "harass"},
172    {"heighth", "height"},
173    {"heirarchy", "hierarchy"},
174    {"humerous", "humorous"},
175    {"hygene", "hygiene"},
176    {"hipocrit", "hypocrite"},
177    {"idenity", "identity"},
178    {"ignorence", "ignorance"},
179    {"imaginery", "imaginary"},
180    {"immitate", "imitate"},
181    {"immitation", "imitation"},
182    {"imediately", "immediately"},
183    {"incidently", "incidentally"},
184    {"independant", "independent"},
185    {"indispensible", "indispensable"},
186    {"innoculate", "inoculate"},
187    {"inteligence", "intelligence"},
188    {"intresting", "interesting"},
189    {"interuption", "interruption"},
190    {"irrelevent", "irrelevant"},
191    {"irritible", "irritable"},
192    {"jellous", "jealous"},
193    {"knowlege", "knowledge"},
194    {"labratory", "laboratory"},
195    {"lenght", "length"},
196    {"liason", "liaison"},
197    {"libary", "library"},
198    {"lisence", "license"},
199    {"lonelyness", "loneliness"},
200    {"lieing", "lying"},
201    {"maintenence", "maintenance"},
202    {"manuever", "maneuver"},
203    {"marrige", "marriage"},
204    {"mathmatics", "mathematics"},
205    {"medcine", "medicine"},
206    {"miniture", "miniature"},
207    {"minite", "minute"},
208    {"mischevous", "mischievous"},
209    {"mispell", "misspell"},
210    // Maybe this one should pass, as it works in hunspell, but not in firefox.
211    // {"misterius", "mysterious"},
212    {"naturaly", "naturally"},
213    {"neccessary", "necessary"},
214    {"neice", "niece"},
215    {"nieghbor", "neighbor"},
216    {"nieghbour", "neighbor"},
217    {"niether", "neither"},
218    {"noticable", "noticeable"},
219    {"occassion", "occasion"},
220    {"occasionaly", "occasionally"},
221    {"occurrance", "occurrence"},
222    {"occured", "occurred"},
223    {"ommision", "omission"},
224    {"oppurtunity", "opportunity"},
225    {"outragous", "outrageous"},
226    {"parrallel", "parallel"},
227    {"parliment", "parliament"},
228    {"particurly", "particularly"},
229    {"passtime", "pastime"},
230    {"peculier", "peculiar"},
231    {"percieve", "perceive"},
232    {"pernament", "permanent"},
233    {"perseverence", "perseverance"},
234    {"personaly", "personally"},
235    {"persaude", "persuade"},
236    {"pichure", "picture"},
237    {"peice", "piece"},
238    {"plagerize", "plagiarize"},
239    {"playright", "playwright"},
240    {"plesant", "pleasant"},
241    {"pollitical", "political"},
242    {"posession", "possession"},
243    {"potatos", "potatoes"},
244    {"practicle", "practical"},
245    {"preceed", "precede"},
246    {"predjudice", "prejudice"},
247    {"presance", "presence"},
248    {"privelege", "privilege"},
249    // This one should probably work. It does in FF and Hunspell.
250    // {"probly", "probably"},
251    {"proffesional", "professional"},
252    {"promiss", "promise"},
253    {"pronounciation", "pronunciation"},
254    {"prufe", "proof"},
255    {"psycology", "psychology"},
256    {"publically", "publicly"},
257    {"quanity", "quantity"},
258    {"quarentine", "quarantine"},
259    {"questionaire", "questionnaire"},
260    {"readible", "readable"},
261    {"realy", "really"},
262    {"recieve", "receive"},
263    {"reciept", "receipt"},
264    {"reconize", "recognize"},
265    {"recomend", "recommend"},
266    {"refered", "referred"},
267    {"referance", "reference"},
268    {"relevent", "relevant"},
269    {"religous", "religious"},
270    {"repitition", "repetition"},
271    {"restarant", "restaurant"},
272    {"rythm", "rhythm"},
273    {"rediculous", "ridiculous"},
274    {"sacrefice", "sacrifice"},
275    {"saftey", "safety"},
276    {"sissors", "scissors"},
277    {"secratary", "secretary"},
278    {"seperate", "separate"},
279    {"sargent", "sergeant"},
280    {"shineing", "shining"},
281    {"similer", "similar"},
282    {"sinceerly", "sincerely"},
283    {"speach", "speech"},
284    {"strenght", "strength"},
285    {"succesful", "successful"},
286    {"supercede", "supersede"},
287    {"surelly", "surely"},
288    {"suprise", "surprise"},
289    {"temperture", "temperature"},
290    {"temprary", "temporary"},
291    {"tommorrow", "tomorrow"},
292    {"tounge", "tongue"},
293    {"truely", "truly"},
294    {"twelth", "twelfth"},
295    {"tyrany", "tyranny"},
296    {"underate", "underrate"},
297    {"untill", "until"},
298    {"unuseual", "unusual"},
299    {"upholstry", "upholstery"},
300    {"usible", "usable"},
301    {"useing", "using"},
302    {"usualy", "usually"},
303    {"vaccuum", "vacuum"},
304    {"vegatarian", "vegetarian"},
305    {"vehical", "vehicle"},
306    {"visious", "vicious"},
307    {"villege", "village"},
308    {"wierd", "weird"},
309    {"wellcome", "welcome"},
310    {"wellfare", "welfare"},
311    {"wilfull", "willful"},
312    {"withold", "withhold"},
313    {"writting", "writing"},
314  };
315
316  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
317    const string16 word(ASCIIToUTF16(kTestCases[i].input));
318    EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, 0)) << word;
319
320    // Check if the suggested words occur.
321    std::vector<string16> suggestions;
322    SpellCheckerPlatform::FillSuggestionList(word, &suggestions);
323    bool suggested_word_is_present = false;
324    const string16 suggested_word(ASCIIToUTF16(kTestCases[i].suggested_word));
325    for (size_t j = 0; j < suggestions.size(); j++) {
326      if (suggestions[j].compare(suggested_word) == 0) {
327        suggested_word_is_present = true;
328        break;
329      }
330    }
331    EXPECT_TRUE(suggested_word_is_present) << suggested_word;
332  }
333}
334