1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/string_util.h"
6#include "base/utf_string_conversions.h"
7#include "chrome/browser/spellchecker_platform_engine.h"
8#include "testing/gtest/include/gtest/gtest.h"
9
10// Tests that words are properly ignored. Currently only enabled on OS X as it
11// is the only platform to support ignoring words. Note that in this test, we
12// supply a non-zero doc_tag, in order to test that ignored words are matched to
13// the correct document.
14TEST(PlatformSpellCheckTest, IgnoreWords_EN_US) {
15  const char* kTestCases[] = {
16    "teh",
17    "morblier",
18    "watre",
19    "noooen",
20  };
21
22  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
23    const string16 word(ASCIIToUTF16(kTestCases[i]));
24    const int doc_tag = SpellCheckerPlatform::GetDocumentTag();
25
26    // The word should show up as misspelled.
27    EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word;
28
29    // Ignore the word.
30    SpellCheckerPlatform::IgnoreWord(word);
31
32    // The word should now show up as correctly spelled.
33    EXPECT_TRUE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word;
34
35    // Close the docuemnt. Any words that we had previously ignored should no
36    // longer be ignored and thus should show up as misspelled.
37    SpellCheckerPlatform::CloseDocumentWithTag(doc_tag);
38
39    // The word should now show be spelled wrong again
40    EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word;
41  }
42}  // Test IgnoreWords_EN_US
43
44TEST(PlatformSpellCheckTest, SpellCheckSuggestions_EN_US) {
45  static const struct {
46    const char* input;           // A string to be tested.
47    const char* suggested_word;  // A suggested word that should occur.
48  } kTestCases[] = {
49    // We need to have separate test cases here, since hunspell and the OS X
50    // spellchecking service occasionally differ on what they consider a valid
51    // suggestion for a given word, although these lists could likely be
52    // integrated somewhat. The test cases for non-Mac are in
53    // chrome/renderer/spellcheck_unittest.cc
54    // These words come from the wikipedia page of the most commonly
55    // misspelled words in english.
56    // (http://en.wikipedia.org/wiki/Commonly_misspelled_words).
57    // However, 10.6 loads multiple dictionaries and enables many non-English
58    // dictionaries by default. As a result, we have removed from the list any
59    // word that is marked as correct because it is correct in another
60    // language.
61    {"absense", "absence"},
62    {"acceptible", "acceptable"},
63    {"accidentaly", "accidentally"},
64    {"acheive", "achieve"},
65    {"acknowlege", "acknowledge"},
66    {"acquaintence", "acquaintance"},
67    {"aquire", "acquire"},
68    {"aquit", "acquit"},
69    {"acrage", "acreage"},
70    {"adultary", "adultery"},
71    {"advertize", "advertise"},
72    {"adviseable", "advisable"},
73    {"alchohol", "alcohol"},
74    {"alege", "allege"},
75    {"allegaince", "allegiance"},
76    {"allmost", "almost"},
77    // Ideally, this test should pass. It works in firefox, but not in hunspell
78    // or OS X.
79    // {"alot", "a lot"},
80    {"amatuer", "amateur"},
81    {"ammend", "amend"},
82    {"amung", "among"},
83    {"anually", "annually"},
84    {"apparant", "apparent"},
85    {"artic", "arctic"},
86    {"arguement", "argument"},
87    {"athiest", "atheist"},
88    {"athelete", "athlete"},
89    {"avrage", "average"},
90    {"awfull", "awful"},
91    {"ballance", "balance"},
92    {"basicly", "basically"},
93    {"becuase", "because"},
94    {"becomeing", "becoming"},
95    {"befor", "before"},
96    {"begining", "beginning"},
97    {"beleive", "believe"},
98    {"bellweather", "bellwether"},
99    {"benifit", "benefit"},
100    {"bouy", "buoy"},
101    {"briliant", "brilliant"},
102    {"burgler", "burglar"},
103    {"camoflage", "camouflage"},
104    {"carefull", "careful"},
105    {"Carribean", "Caribbean"},
106    {"catagory", "category"},
107    {"cauhgt", "caught"},
108    {"cieling", "ceiling"},
109    {"cemetary", "cemetery"},
110    {"certin", "certain"},
111    {"changable", "changeable"},
112    {"cheif", "chief"},
113    {"citezen", "citizen"},
114    {"collaegue", "colleague"},
115    {"colum", "column"},
116    {"comming", "coming"},
117    {"commited", "committed"},
118    {"compitition", "competition"},
119    {"conceed", "concede"},
120    {"congradulate", "congratulate"},
121    {"consciencious", "conscientious"},
122    {"concious", "conscious"},
123    {"concensus", "consensus"},
124    {"contraversy", "controversy"},
125    {"conveniance", "convenience"},
126    {"critecize", "criticize"},
127    {"dacquiri", "daiquiri"},
128    {"decieve", "deceive"},
129    {"dicide", "decide"},
130    {"definate", "definite"},
131    {"definitly", "definitely"},
132    {"desparate", "desperate"},
133    {"develope", "develop"},
134    {"diffrence", "difference"},
135    {"disapear", "disappear"},
136    {"disapoint", "disappoint"},
137    {"disasterous", "disastrous"},
138    {"disipline", "discipline"},
139    {"drunkeness", "drunkenness"},
140    {"dumbell", "dumbbell"},
141    {"easely", "easily"},
142    {"eigth", "eight"},
143    {"embarass", "embarrass"},
144    {"enviroment", "environment"},
145    {"equiped", "equipped"},
146    {"equiptment", "equipment"},
147    {"exagerate", "exaggerate"},
148    {"exellent", "excellent"},
149    {"exsept", "except"},
150    {"exercize", "exercise"},
151    {"exilerate", "exhilarate"},
152    {"existance", "existence"},
153    {"experiance", "experience"},
154    {"experament", "experiment"},
155    {"explaination", "explanation"},
156    {"facinating", "fascinating"},
157    {"firey", "fiery"},
158    {"finaly", "finally"},
159    {"flourescent", "fluorescent"},
160    {"foriegn", "foreign"},
161    {"fourty", "forty"},
162    {"foreward", "forward"},
163    {"freind", "friend"},
164    {"fundemental", "fundamental"},
165    {"guage", "gauge"},
166    {"generaly", "generally"},
167    {"goverment", "government"},
168    {"gratefull", "grateful"},
169    {"garantee", "guarantee"},
170    {"guidence", "guidance"},
171    {"happyness", "happiness"},
172    {"harrass", "harass"},
173    {"heighth", "height"},
174    {"heirarchy", "hierarchy"},
175    {"humerous", "humorous"},
176    {"hygene", "hygiene"},
177    {"hipocrit", "hypocrite"},
178    {"idenity", "identity"},
179    {"ignorence", "ignorance"},
180    {"imaginery", "imaginary"},
181    {"immitate", "imitate"},
182    {"immitation", "imitation"},
183    {"imediately", "immediately"},
184    {"incidently", "incidentally"},
185    {"independant", "independent"},
186    {"indispensible", "indispensable"},
187    {"innoculate", "inoculate"},
188    {"inteligence", "intelligence"},
189    {"intresting", "interesting"},
190    {"interuption", "interruption"},
191    {"irrelevent", "irrelevant"},
192    {"irritible", "irritable"},
193    {"jellous", "jealous"},
194    {"knowlege", "knowledge"},
195    {"labratory", "laboratory"},
196    {"lenght", "length"},
197    {"liason", "liaison"},
198    {"libary", "library"},
199    {"lisence", "license"},
200    {"lonelyness", "loneliness"},
201    {"lieing", "lying"},
202    {"maintenence", "maintenance"},
203    {"manuever", "maneuver"},
204    {"marrige", "marriage"},
205    {"mathmatics", "mathematics"},
206    {"medcine", "medicine"},
207    {"miniture", "miniature"},
208    {"minite", "minute"},
209    {"mischevous", "mischievous"},
210    {"mispell", "misspell"},
211    // Maybe this one should pass, as it works in hunspell, but not in firefox.
212    // {"misterius", "mysterious"},
213    {"naturaly", "naturally"},
214    {"neccessary", "necessary"},
215    {"neice", "niece"},
216    {"nieghbor", "neighbor"},
217    {"nieghbour", "neighbor"},
218    {"niether", "neither"},
219    {"noticable", "noticeable"},
220    {"occassion", "occasion"},
221    {"occasionaly", "occasionally"},
222    {"occurrance", "occurrence"},
223    {"occured", "occurred"},
224    {"ommision", "omission"},
225    {"oppurtunity", "opportunity"},
226    {"outragous", "outrageous"},
227    {"parrallel", "parallel"},
228    {"parliment", "parliament"},
229    {"particurly", "particularly"},
230    {"passtime", "pastime"},
231    {"peculier", "peculiar"},
232    {"percieve", "perceive"},
233    {"pernament", "permanent"},
234    {"perseverence", "perseverance"},
235    {"personaly", "personally"},
236    {"persaude", "persuade"},
237    {"pichure", "picture"},
238    {"peice", "piece"},
239    {"plagerize", "plagiarize"},
240    {"playright", "playwright"},
241    {"plesant", "pleasant"},
242    {"pollitical", "political"},
243    {"posession", "possession"},
244    {"potatos", "potatoes"},
245    {"practicle", "practical"},
246    {"preceed", "precede"},
247    {"predjudice", "prejudice"},
248    {"presance", "presence"},
249    {"privelege", "privilege"},
250    // This one should probably work. It does in FF and Hunspell.
251    // {"probly", "probably"},
252    {"proffesional", "professional"},
253    {"promiss", "promise"},
254    {"pronounciation", "pronunciation"},
255    {"prufe", "proof"},
256    {"psycology", "psychology"},
257    {"publically", "publicly"},
258    {"quanity", "quantity"},
259    {"quarentine", "quarantine"},
260    {"questionaire", "questionnaire"},
261    {"readible", "readable"},
262    {"realy", "really"},
263    {"recieve", "receive"},
264    {"reciept", "receipt"},
265    {"reconize", "recognize"},
266    {"recomend", "recommend"},
267    {"refered", "referred"},
268    {"referance", "reference"},
269    {"relevent", "relevant"},
270    {"religous", "religious"},
271    {"repitition", "repetition"},
272    {"restarant", "restaurant"},
273    {"rythm", "rhythm"},
274    {"rediculous", "ridiculous"},
275    {"sacrefice", "sacrifice"},
276    {"saftey", "safety"},
277    {"sissors", "scissors"},
278    {"secratary", "secretary"},
279    {"seperate", "separate"},
280    {"sargent", "sergeant"},
281    {"shineing", "shining"},
282    {"similer", "similar"},
283    {"sinceerly", "sincerely"},
284    {"speach", "speech"},
285    {"strenght", "strength"},
286    {"succesful", "successful"},
287    {"supercede", "supersede"},
288    {"surelly", "surely"},
289    {"suprise", "surprise"},
290    {"temperture", "temperature"},
291    {"temprary", "temporary"},
292    {"tommorrow", "tomorrow"},
293    {"tounge", "tongue"},
294    {"truely", "truly"},
295    {"twelth", "twelfth"},
296    {"tyrany", "tyranny"},
297    {"underate", "underrate"},
298    {"untill", "until"},
299    {"unuseual", "unusual"},
300    {"upholstry", "upholstery"},
301    {"usible", "usable"},
302    {"useing", "using"},
303    {"usualy", "usually"},
304    {"vaccuum", "vacuum"},
305    {"vegatarian", "vegetarian"},
306    {"vehical", "vehicle"},
307    {"visious", "vicious"},
308    {"villege", "village"},
309    {"wierd", "weird"},
310    {"wellcome", "welcome"},
311    {"wellfare", "welfare"},
312    {"wilfull", "willful"},
313    {"withold", "withhold"},
314    {"writting", "writing"},
315  };
316
317  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
318    const string16 word(ASCIIToUTF16(kTestCases[i].input));
319    EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, 0)) << word;
320
321    // Check if the suggested words occur.
322    std::vector<string16> suggestions;
323    SpellCheckerPlatform::FillSuggestionList(word, &suggestions);
324    bool suggested_word_is_present = false;
325    const string16 suggested_word(ASCIIToUTF16(kTestCases[i].suggested_word));
326    for (size_t j = 0; j < suggestions.size(); j++) {
327      if (suggestions[j].compare(suggested_word) == 0) {
328        suggested_word_is_present = true;
329        break;
330      }
331    }
332    EXPECT_TRUE(suggested_word_is_present) << suggested_word;
333  }
334}
335