1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/files/file_util.h"
6#include "base/message_loop/message_loop.h"
7#include "base/path_service.h"
8#include "base/strings/sys_string_conversions.h"
9#include "base/strings/utf_string_conversions.h"
10#include "chrome/common/chrome_paths.h"
11#include "chrome/common/spellcheck_common.h"
12#include "chrome/common/spellcheck_result.h"
13#include "chrome/renderer/spellchecker/hunspell_engine.h"
14#include "chrome/renderer/spellchecker/spellcheck.h"
15#include "testing/gtest/include/gtest/gtest.h"
16#include "third_party/WebKit/public/web/WebTextCheckingCompletion.h"
17#include "third_party/WebKit/public/web/WebTextCheckingResult.h"
18
19namespace {
20
21base::FilePath GetHunspellDirectory() {
22  base::FilePath hunspell_directory;
23  if (!PathService::Get(base::DIR_SOURCE_ROOT, &hunspell_directory))
24    return base::FilePath();
25
26  hunspell_directory = hunspell_directory.AppendASCII("third_party");
27  hunspell_directory = hunspell_directory.AppendASCII("hunspell_dictionaries");
28  return hunspell_directory;
29}
30
31}  // namespace
32
33// TODO(groby): This needs to be a BrowserTest for OSX.
34class SpellCheckTest : public testing::Test {
35 public:
36  SpellCheckTest() {
37    ReinitializeSpellCheck("en-US");
38  }
39
40  void ReinitializeSpellCheck(const std::string& language) {
41    spell_check_.reset(new SpellCheck());
42    InitializeSpellCheck(language);
43  }
44
45  void UninitializeSpellCheck() {
46    spell_check_.reset(new SpellCheck());
47  }
48
49  bool InitializeIfNeeded() {
50    return spell_check()->InitializeIfNeeded();
51  }
52
53  void InitializeSpellCheck(const std::string& language) {
54    base::FilePath hunspell_directory = GetHunspellDirectory();
55    EXPECT_FALSE(hunspell_directory.empty());
56    base::File file(
57        chrome::spellcheck_common::GetVersionedFileName(language,
58                                                        hunspell_directory),
59        base::File::FLAG_OPEN | base::File::FLAG_READ);
60#if defined(OS_MACOSX)
61    // TODO(groby): Forcing spellcheck to use hunspell, even on OSX.
62    // Instead, tests should exercise individual spelling engines.
63    spell_check_->spellcheck_.platform_spelling_engine_.reset(
64        new HunspellEngine);
65#endif
66    spell_check_->Init(file.Pass(), std::set<std::string>(), language);
67  }
68
69  void EnableAutoCorrect(bool enable_autocorrect) {
70    spell_check_->OnEnableAutoSpellCorrect(enable_autocorrect);
71  }
72
73  virtual ~SpellCheckTest() {
74  }
75
76  SpellCheck* spell_check() { return spell_check_.get(); }
77
78  bool CheckSpelling(const std::string& word, int tag) {
79    return spell_check_->spellcheck_.platform_spelling_engine_->CheckSpelling(
80        base::ASCIIToUTF16(word), tag);
81  }
82
83#if !defined(OS_MACOSX)
84 protected:
85  void TestSpellCheckParagraph(
86      const base::string16& input,
87      const std::vector<SpellCheckResult>& expected) {
88    blink::WebVector<blink::WebTextCheckingResult> results;
89    spell_check()->SpellCheckParagraph(input,
90                                       &results);
91
92    EXPECT_EQ(results.size(), expected.size());
93    size_t size = std::min(results.size(), expected.size());
94    for (size_t j = 0; j < size; ++j) {
95      EXPECT_EQ(results[j].decoration, blink::WebTextDecorationTypeSpelling);
96      EXPECT_EQ(results[j].location, expected[j].location);
97      EXPECT_EQ(results[j].length, expected[j].length);
98    }
99  }
100#endif
101
102 private:
103  scoped_ptr<SpellCheck> spell_check_;
104  base::MessageLoop loop;
105};
106
107// A fake completion object for verification.
108class MockTextCheckingCompletion : public blink::WebTextCheckingCompletion {
109 public:
110  MockTextCheckingCompletion()
111      : completion_count_(0) {
112  }
113
114  virtual void didFinishCheckingText(
115      const blink::WebVector<blink::WebTextCheckingResult>& results)
116          OVERRIDE {
117    completion_count_++;
118    last_results_ = results;
119  }
120
121  virtual void didCancelCheckingText() OVERRIDE {
122    completion_count_++;
123  }
124
125  size_t completion_count_;
126  blink::WebVector<blink::WebTextCheckingResult> last_results_;
127};
128
129// Operates unit tests for the content::SpellCheck::SpellCheckWord() function
130// with the US English dictionary.
131// The unit tests in this function consist of:
132//   * Tests for the function with empty strings;
133//   * Tests for the function with a valid English word;
134//   * Tests for the function with a valid non-English word;
135//   * Tests for the function with a valid English word with a preceding
136//     space character;
137//   * Tests for the function with a valid English word with a preceding
138//     non-English word;
139//   * Tests for the function with a valid English word with a following
140//     space character;
141//   * Tests for the function with a valid English word with a following
142//     non-English word;
143//   * Tests for the function with two valid English words concatenated
144//     with space characters or non-English words;
145//   * Tests for the function with an invalid English word;
146//   * Tests for the function with an invalid English word with a preceding
147//     space character;
148//   * Tests for the function with an invalid English word with a preceding
149//     non-English word;
150//   * Tests for the function with an invalid English word with a following
151//     space character;
152//   * Tests for the function with an invalid English word with a following
153//     non-English word, and;
154//   * Tests for the function with two invalid English words concatenated
155//     with space characters or non-English words.
156// A test with a "[ROBUSTNESS]" mark shows it is a robustness test and it uses
157// grammatically incorrect string.
158// TODO(groby): Please feel free to add more tests.
159TEST_F(SpellCheckTest, SpellCheckStrings_EN_US) {
160  static const struct {
161    // A string to be tested.
162    const wchar_t* input;
163    // An expected result for this test case.
164    //   * true: the input string does not have any invalid words.
165    //   * false: the input string has one or more invalid words.
166    bool expected_result;
167    // The position and the length of the first invalid word.
168    int misspelling_start;
169    int misspelling_length;
170  } kTestCases[] = {
171    // Empty strings.
172    {L"", true},
173    {L" ", true},
174    {L"\xA0", true},
175    {L"\x3000", true},
176
177    // A valid English word "hello".
178    {L"hello", true},
179    // A valid Chinese word (meaning "hello") consisting of two CJKV
180    // ideographs
181    {L"\x4F60\x597D", true},
182    // A valid Korean word (meaning "hello") consisting of five hangul
183    // syllables
184    {L"\xC548\xB155\xD558\xC138\xC694", true},
185    // A valid Japanese word (meaning "hello") consisting of five Hiragana
186    // letters
187    {L"\x3053\x3093\x306B\x3061\x306F", true},
188    // A valid Hindi word (meaning ?) consisting of six Devanagari letters
189    // (This word is copied from "http://b/issue?id=857583".)
190    {L"\x0930\x093E\x091C\x0927\x093E\x0928", true},
191    // A valid English word "affix" using a Latin ligature 'ffi'
192    {L"a\xFB03x", true},
193    // A valid English word "hello" (fullwidth version)
194    {L"\xFF28\xFF45\xFF4C\xFF4C\xFF4F", true},
195    // Two valid Greek words (meaning "hello") consisting of seven Greek
196    // letters
197    {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true},
198    // A valid Russian word (meaning "hello") consisting of twelve Cyrillic
199    // letters
200    {L"\x0437\x0434\x0440\x0430\x0432\x0441"
201     L"\x0442\x0432\x0443\x0439\x0442\x0435", true},
202    // A valid English contraction
203    {L"isn't", true},
204    // A valid English word enclosed with underscores.
205    {L"_hello_", true},
206
207    // A valid English word with a preceding whitespace
208    {L" " L"hello", true},
209    // A valid English word with a preceding no-break space
210    {L"\xA0" L"hello", true},
211    // A valid English word with a preceding ideographic space
212    {L"\x3000" L"hello", true},
213    // A valid English word with a preceding Chinese word
214    {L"\x4F60\x597D" L"hello", true},
215    // [ROBUSTNESS] A valid English word with a preceding Korean word
216    {L"\xC548\xB155\xD558\xC138\xC694" L"hello", true},
217    // A valid English word with a preceding Japanese word
218    {L"\x3053\x3093\x306B\x3061\x306F" L"hello", true},
219    // [ROBUSTNESS] A valid English word with a preceding Hindi word
220    {L"\x0930\x093E\x091C\x0927\x093E\x0928" L"hello", true},
221    // [ROBUSTNESS] A valid English word with two preceding Greek words
222    {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
223     L"hello", true},
224    // [ROBUSTNESS] A valid English word with a preceding Russian word
225    {L"\x0437\x0434\x0440\x0430\x0432\x0441"
226     L"\x0442\x0432\x0443\x0439\x0442\x0435" L"hello", true},
227
228    // A valid English word with a following whitespace
229    {L"hello" L" ", true},
230    // A valid English word with a following no-break space
231    {L"hello" L"\xA0", true},
232    // A valid English word with a following ideographic space
233    {L"hello" L"\x3000", true},
234    // A valid English word with a following Chinese word
235    {L"hello" L"\x4F60\x597D", true},
236    // [ROBUSTNESS] A valid English word with a following Korean word
237    {L"hello" L"\xC548\xB155\xD558\xC138\xC694", true},
238    // A valid English word with a following Japanese word
239    {L"hello" L"\x3053\x3093\x306B\x3061\x306F", true},
240    // [ROBUSTNESS] A valid English word with a following Hindi word
241    {L"hello" L"\x0930\x093E\x091C\x0927\x093E\x0928", true},
242    // [ROBUSTNESS] A valid English word with two following Greek words
243    {L"hello"
244     L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true},
245    // [ROBUSTNESS] A valid English word with a following Russian word
246    {L"hello" L"\x0437\x0434\x0440\x0430\x0432\x0441"
247     L"\x0442\x0432\x0443\x0439\x0442\x0435", true},
248
249    // Two valid English words concatenated with a whitespace
250    {L"hello" L" " L"hello", true},
251    // Two valid English words concatenated with a no-break space
252    {L"hello" L"\xA0" L"hello", true},
253    // Two valid English words concatenated with an ideographic space
254    {L"hello" L"\x3000" L"hello", true},
255    // Two valid English words concatenated with a Chinese word
256    {L"hello" L"\x4F60\x597D" L"hello", true},
257    // [ROBUSTNESS] Two valid English words concatenated with a Korean word
258    {L"hello" L"\xC548\xB155\xD558\xC138\xC694" L"hello", true},
259    // Two valid English words concatenated with a Japanese word
260    {L"hello" L"\x3053\x3093\x306B\x3061\x306F" L"hello", true},
261    // [ROBUSTNESS] Two valid English words concatenated with a Hindi word
262    {L"hello" L"\x0930\x093E\x091C\x0927\x093E\x0928" L"hello" , true},
263    // [ROBUSTNESS] Two valid English words concatenated with two Greek words
264    {L"hello" L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
265     L"hello", true},
266    // [ROBUSTNESS] Two valid English words concatenated with a Russian word
267    {L"hello" L"\x0437\x0434\x0440\x0430\x0432\x0441"
268     L"\x0442\x0432\x0443\x0439\x0442\x0435" L"hello", true},
269    // [ROBUSTNESS] Two valid English words concatenated with a contraction
270    // character.
271    {L"hello:hello", true},
272
273    // An invalid English word
274    {L"ifmmp", false, 0, 5},
275    // An invalid English word "bffly" containing a Latin ligature 'ffl'
276    {L"b\xFB04y", false, 0, 3},
277    // An invalid English word "ifmmp" (fullwidth version)
278    {L"\xFF29\xFF46\xFF4D\xFF4D\xFF50", false, 0, 5},
279    // An invalid English contraction
280    {L"jtm'u", false, 0, 5},
281    // An invalid English word enclosed with underscores.
282    {L"_ifmmp_", false, 1, 5},
283
284    // An invalid English word with a preceding whitespace
285    {L" " L"ifmmp", false, 1, 5},
286    // An invalid English word with a preceding no-break space
287    {L"\xA0" L"ifmmp", false, 1, 5},
288    // An invalid English word with a preceding ideographic space
289    {L"\x3000" L"ifmmp", false, 1, 5},
290    // An invalid English word with a preceding Chinese word
291    {L"\x4F60\x597D" L"ifmmp", false, 2, 5},
292    // [ROBUSTNESS] An invalid English word with a preceding Korean word
293    {L"\xC548\xB155\xD558\xC138\xC694" L"ifmmp", false, 5, 5},
294    // An invalid English word with a preceding Japanese word
295    {L"\x3053\x3093\x306B\x3061\x306F" L"ifmmp", false, 5, 5},
296    // [ROBUSTNESS] An invalid English word with a preceding Hindi word
297    {L"\x0930\x093E\x091C\x0927\x093E\x0928" L"ifmmp", false, 6, 5},
298    // [ROBUSTNESS] An invalid English word with two preceding Greek words
299    {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
300     L"ifmmp", false, 8, 5},
301    // [ROBUSTNESS] An invalid English word with a preceding Russian word
302    {L"\x0437\x0434\x0440\x0430\x0432\x0441"
303     L"\x0442\x0432\x0443\x0439\x0442\x0435" L"ifmmp", false, 12, 5},
304
305    // An invalid English word with a following whitespace
306    {L"ifmmp" L" ", false, 0, 5},
307    // An invalid English word with a following no-break space
308    {L"ifmmp" L"\xA0", false, 0, 5},
309    // An invalid English word with a following ideographic space
310    {L"ifmmp" L"\x3000", false, 0, 5},
311    // An invalid English word with a following Chinese word
312    {L"ifmmp" L"\x4F60\x597D", false, 0, 5},
313    // [ROBUSTNESS] An invalid English word with a following Korean word
314    {L"ifmmp" L"\xC548\xB155\xD558\xC138\xC694", false, 0, 5},
315    // An invalid English word with a following Japanese word
316    {L"ifmmp" L"\x3053\x3093\x306B\x3061\x306F", false, 0, 5},
317    // [ROBUSTNESS] An invalid English word with a following Hindi word
318    {L"ifmmp" L"\x0930\x093E\x091C\x0927\x093E\x0928", false, 0, 5},
319    // [ROBUSTNESS] An invalid English word with two following Greek words
320    {L"ifmmp"
321     L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", false, 0, 5},
322    // [ROBUSTNESS] An invalid English word with a following Russian word
323    {L"ifmmp" L"\x0437\x0434\x0440\x0430\x0432\x0441"
324     L"\x0442\x0432\x0443\x0439\x0442\x0435", false, 0, 5},
325
326    // Two invalid English words concatenated with a whitespace
327    {L"ifmmp" L" " L"ifmmp", false, 0, 5},
328    // Two invalid English words concatenated with a no-break space
329    {L"ifmmp" L"\xA0" L"ifmmp", false, 0, 5},
330    // Two invalid English words concatenated with an ideographic space
331    {L"ifmmp" L"\x3000" L"ifmmp", false, 0, 5},
332    // Two invalid English words concatenated with a Chinese word
333    {L"ifmmp" L"\x4F60\x597D" L"ifmmp", false, 0, 5},
334    // [ROBUSTNESS] Two invalid English words concatenated with a Korean word
335    {L"ifmmp" L"\xC548\xB155\xD558\xC138\xC694" L"ifmmp", false, 0, 5},
336    // Two invalid English words concatenated with a Japanese word
337    {L"ifmmp" L"\x3053\x3093\x306B\x3061\x306F" L"ifmmp", false, 0, 5},
338    // [ROBUSTNESS] Two invalid English words concatenated with a Hindi word
339    {L"ifmmp" L"\x0930\x093E\x091C\x0927\x093E\x0928" L"ifmmp" , false, 0, 5},
340    // [ROBUSTNESS] Two invalid English words concatenated with two Greek words
341    {L"ifmmp" L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
342     L"ifmmp", false, 0, 5},
343    // [ROBUSTNESS] Two invalid English words concatenated with a Russian word
344    {L"ifmmp" L"\x0437\x0434\x0440\x0430\x0432\x0441"
345     L"\x0442\x0432\x0443\x0439\x0442\x0435" L"ifmmp", false, 0, 5},
346    // [ROBUSTNESS] Two invalid English words concatenated with a contraction
347    // character.
348    {L"ifmmp:ifmmp", false, 0, 11},
349
350    // [REGRESSION] Issue 13432: "Any word of 13 or 14 characters is not
351    // spellcheck" <http://crbug.com/13432>.
352    {L"qwertyuiopasd", false, 0, 13},
353    {L"qwertyuiopasdf", false, 0, 14},
354
355    // [REGRESSION] Issue 128896: "en_US hunspell dictionary includes
356    // acknowledgement but not acknowledgements" <http://crbug.com/128896>
357    {L"acknowledgement", true},
358    {L"acknowledgements", true},
359
360    // Issue 123290: "Spellchecker should treat numbers as word characters"
361    {L"0th", true},
362    {L"1st", true},
363    {L"2nd", true},
364    {L"3rd", true},
365    {L"4th", true},
366    {L"5th", true},
367    {L"6th", true},
368    {L"7th", true},
369    {L"8th", true},
370    {L"9th", true},
371    {L"10th", true},
372    {L"100th", true},
373    {L"1000th", true},
374    {L"25", true},
375    {L"2012", true},
376    {L"100,000,000", true},
377    {L"3.141592653", true},
378
379  };
380
381  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
382    size_t input_length = 0;
383    if (kTestCases[i].input != NULL) {
384      input_length = wcslen(kTestCases[i].input);
385    }
386    int misspelling_start;
387    int misspelling_length;
388    bool result = spell_check()->SpellCheckWord(
389        base::WideToUTF16(kTestCases[i].input).c_str(),
390        static_cast<int>(input_length),
391        0,
392        &misspelling_start,
393        &misspelling_length, NULL);
394
395    EXPECT_EQ(kTestCases[i].expected_result, result);
396    EXPECT_EQ(kTestCases[i].misspelling_start, misspelling_start);
397    EXPECT_EQ(kTestCases[i].misspelling_length, misspelling_length);
398  }
399}
400
401TEST_F(SpellCheckTest, SpellCheckSuggestions_EN_US) {
402  static const struct {
403    // A string to be tested.
404    const wchar_t* input;
405    // An expected result for this test case.
406    //   * true: the input string does not have any invalid words.
407    //   * false: the input string has one or more invalid words.
408    bool expected_result;
409    // The position and the length of the first invalid word.
410    int misspelling_start;
411    int misspelling_length;
412
413    // A suggested word that should occur.
414    const wchar_t* suggested_word;
415  } kTestCases[] = {
416    {L"ello", false, 0, 0, L"hello"},
417    {L"ello", false, 0, 0, L"cello"},
418    {L"wate", false, 0, 0, L"water"},
419    {L"wate", false, 0, 0, L"waste"},
420    {L"wate", false, 0, 0, L"sate"},
421    {L"wate", false, 0, 0, L"ate"},
422    {L"jum", false, 0, 0, L"jump"},
423    {L"jum", false, 0, 0, L"hum"},
424    {L"jum", false, 0, 0, L"sum"},
425    {L"jum", false, 0, 0, L"um"},
426    // A regression test for Issue 36523.
427    {L"privliged", false, 0, 0, L"privileged"},
428    // TODO (Sidchat): add many more examples.
429  };
430
431  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
432    std::vector<base::string16> suggestions;
433    size_t input_length = 0;
434    if (kTestCases[i].input != NULL) {
435      input_length = wcslen(kTestCases[i].input);
436    }
437    int misspelling_start;
438    int misspelling_length;
439    bool result = spell_check()->SpellCheckWord(
440        base::WideToUTF16(kTestCases[i].input).c_str(),
441        static_cast<int>(input_length),
442        0,
443        &misspelling_start,
444        &misspelling_length,
445        &suggestions);
446
447    // Check for spelling.
448    EXPECT_EQ(kTestCases[i].expected_result, result);
449
450    // Check if the suggested words occur.
451    bool suggested_word_is_present = false;
452    for (int j = 0; j < static_cast<int>(suggestions.size()); j++) {
453      if (suggestions.at(j).compare(
454              base::WideToUTF16(kTestCases[i].suggested_word)) == 0) {
455        suggested_word_is_present = true;
456        break;
457      }
458    }
459
460    EXPECT_TRUE(suggested_word_is_present);
461  }
462}
463
464// This test verifies our spellchecker can split a text into words and check
465// the spelling of each word in the text.
466#if defined(THREAD_SANITIZER)
467// SpellCheckTest.SpellCheckText fails under ThreadSanitizer v2.
468// See http://crbug.com/217909.
469#define MAYBE_SpellCheckText DISABLED_SpellCheckText
470#else
471#define MAYBE_SpellCheckText SpellCheckText
472#endif  // THREAD_SANITIZER
473TEST_F(SpellCheckTest, MAYBE_SpellCheckText) {
474  static const struct {
475    const char* language;
476    const wchar_t* input;
477  } kTestCases[] = {
478    {
479      // Afrikaans
480      "af-ZA",
481      L"Google se missie is om die w\x00EAreld se inligting te organiseer en "
482      L"dit bruikbaar en toeganklik te maak."
483    }, {
484      // Catalan
485      "ca-ES",
486      L"La missi\x00F3 de Google \x00E9s organitzar la informaci\x00F3 "
487      L"del m\x00F3n i fer que sigui \x00FAtil i accessible universalment."
488    }, {
489      // Czech
490      "cs-CZ",
491      L"Posl\x00E1n\x00EDm spole\x010Dnosti Google je "
492      L"uspo\x0159\x00E1\x0064\x0061t informace z cel\x00E9ho sv\x011Bta "
493      L"tak, aby byly v\x0161\x0065obecn\x011B p\x0159\x00EDstupn\x00E9 "
494      L"a u\x017Eite\x010Dn\x00E9."
495    }, {
496      // Danish
497      "da-DK",
498      L"Googles "
499      L"mission er at organisere verdens information og g\x00F8re den "
500      L"almindeligt tilg\x00E6ngelig og nyttig."
501    }, {
502      // German
503      "de-DE",
504      L"Das Ziel von Google besteht darin, die auf der Welt vorhandenen "
505      L"Informationen zu organisieren und allgemein zug\x00E4nglich und "
506      L"nutzbar zu machen."
507    }, {
508      // Greek
509      "el-GR",
510      L"\x0391\x03C0\x03BF\x03C3\x03C4\x03BF\x03BB\x03AE "
511      L"\x03C4\x03B7\x03C2 Google \x03B5\x03AF\x03BD\x03B1\x03B9 "
512      L"\x03BD\x03B1 \x03BF\x03C1\x03B3\x03B1\x03BD\x03CE\x03BD\x03B5\x03B9 "
513      L"\x03C4\x03B9\x03C2 "
514      L"\x03C0\x03BB\x03B7\x03C1\x03BF\x03C6\x03BF\x03C1\x03AF\x03B5\x03C2 "
515      L"\x03C4\x03BF\x03C5 \x03BA\x03CC\x03C3\x03BC\x03BF\x03C5 "
516      L"\x03BA\x03B1\x03B9 \x03BD\x03B1 \x03C4\x03B9\x03C2 "
517      L"\x03BA\x03B1\x03B8\x03B9\x03C3\x03C4\x03AC "
518      L"\x03C0\x03C1\x03BF\x03C3\x03B2\x03AC\x03C3\x03B9\x03BC\x03B5\x03C2 "
519      L"\x03BA\x03B1\x03B9 \x03C7\x03C1\x03AE\x03C3\x03B9\x03BC\x03B5\x03C2."
520    }, {
521      // English (Australia)
522      "en-AU",
523      L"Google's mission is to organise the world's information and make it "
524      L"universally accessible and useful."
525    }, {
526      // English (Canada)
527      "en-CA",
528      L"Google's mission is to organize the world's information and make it "
529      L"universally accessible and useful."
530    }, {
531      // English (United Kingdom)
532      "en-GB",
533      L"Google's mission is to organise the world's information and make it "
534      L"universally accessible and useful."
535    }, {
536      // English (United States)
537      "en-US",
538      L"Google's mission is to organize the world's information and make it "
539      L"universally accessible and useful."
540    }, {
541      // Bulgarian
542      "bg-BG",
543      L"\x041c\x0438\x0441\x0438\x044f\x0442\x0430 "
544      L"\x043d\x0430 Google \x0435 \x0434\x0430 \x043e"
545      L"\x0440\x0433\x0430\x043d\x0438\x0437\x0438\x0440"
546      L"\x0430 \x0441\x0432\x0435\x0442\x043e\x0432"
547      L"\x043d\x0430\x0442\x0430 \x0438\x043d\x0444"
548      L"\x043e\x0440\x043c\x0430\x0446\x0438\x044f "
549      L"\x0438 \x0434\x0430 \x044f \x043d"
550      L"\x0430\x043f\x0440\x0430\x0432\x0438 \x0443"
551      L"\x043d\x0438\x0432\x0435\x0440\x0441\x0430\x043b"
552      L"\x043d\x043e \x0434\x043e\x0441\x0442\x044a"
553      L"\x043f\x043d\x0430 \x0438 \x043f\x043e"
554      L"\x043b\x0435\x0437\x043d\x0430."
555    }, {
556      // Spanish
557      "es-ES",
558      L"La misi\x00F3n de "
559      // L"Google" - to be added.
560      L" es organizar la informaci\x00F3n mundial "
561      L"para que resulte universalmente accesible y \x00FAtil."
562    }, {
563      // Estonian
564      "et-EE",
565      // L"Google'ile " - to be added.
566      L"\x00FClesanne on korraldada maailma teavet ja teeb selle "
567      L"k\x00F5igile k\x00E4ttesaadavaks ja kasulikuks.",
568    }, {
569      // Faroese
570      "fo-FO",
571      L"Google er at samskipa alla vitan \x00ED heiminum og gera hana alment "
572      L"atkomiliga og n\x00FDtiliga."
573    }, {
574      // French
575      "fr-FR",
576      L"Google a pour mission d'organiser les informations \x00E0 "
577      L"l'\x00E9\x0063helle mondiale dans le but de les rendre accessibles "
578      L"et utiles \x00E0 tous."
579    }, {
580      // Hebrew
581      "he-IL",
582      L"\x05D4\x05DE\x05E9\x05D9\x05DE\x05D4 \x05E9\x05DC Google "
583      L"\x05D4\x05D9\x05D0 \x05DC\x05D0\x05E8\x05D2\x05DF "
584      L"\x05D0\x05EA \x05D4\x05DE\x05D9\x05D3\x05E2 "
585      L"\x05D4\x05E2\x05D5\x05DC\x05DE\x05D9 "
586      L"\x05D5\x05DC\x05D4\x05E4\x05D5\x05DA \x05D0\x05D5\x05EA\x05D5 "
587      L"\x05DC\x05D6\x05DE\x05D9\x05DF "
588      L"\x05D5\x05E9\x05D9\x05DE\x05D5\x05E9\x05D9 \x05D1\x05DB\x05DC "
589      L"\x05D4\x05E2\x05D5\x05DC\x05DD. "
590      // Two words with ASCII double/single quoation marks.
591      L"\x05DE\x05E0\x05DB\x0022\x05DC \x05E6\x0027\x05D9\x05E4\x05E1"
592    }, {
593      // Hindi
594      "hi-IN",
595      L"Google \x0915\x093E \x092E\x093F\x0936\x0928 "
596      L"\x0926\x0941\x0928\x093F\x092F\x093E \x0915\x0940 "
597      L"\x091C\x093E\x0928\x0915\x093E\x0930\x0940 \x0915\x094B "
598      L"\x0935\x094D\x092F\x0935\x0938\x094D\x0925\x093F\x0924 "
599      L"\x0915\x0930\x0928\x093E \x0914\x0930 \x0909\x0938\x0947 "
600      L"\x0938\x093E\x0930\x094D\x0935\x092D\x094C\x092E\x093F\x0915 "
601      L"\x0930\x0942\x092A \x0938\x0947 \x092A\x0939\x0941\x0901\x091A "
602      L"\x092E\x0947\x0902 \x0914\x0930 \x0909\x092A\x092F\x094B\x0917\x0940 "
603      L"\x092C\x0928\x093E\x0928\x093E \x0939\x0948."
604    }, {
605      // Hungarian
606      "hu-HU",
607      L"A Google azt a k\x00FCldet\x00E9st v\x00E1llalta mag\x00E1ra, "
608      L"hogy a vil\x00E1gon fellelhet\x0151 inform\x00E1\x0063i\x00F3kat "
609      L"rendszerezze \x00E9s \x00E1ltal\x00E1nosan el\x00E9rhet\x0151v\x00E9, "
610      L"illetve haszn\x00E1lhat\x00F3v\x00E1 tegye."
611    }, {
612      // Croatian
613      "hr-HR",
614      // L"Googleova " - to be added.
615      L"je misija organizirati svjetske informacije i u\x010Diniti ih "
616      // L"univerzalno " - to be added.
617      L"pristupa\x010Dnima i korisnima."
618    }, {
619      // Indonesian
620      "id-ID",
621      L"Misi Google adalah untuk mengelola informasi dunia dan membuatnya "
622      L"dapat diakses dan bermanfaat secara universal."
623    }, {
624      // Italian
625      "it-IT",
626      L"La missione di Google \x00E8 organizzare le informazioni a livello "
627      L"mondiale e renderle universalmente accessibili e fruibili."
628    }, {
629      // Lithuanian
630      "lt-LT",
631      L"\x201EGoogle\x201C tikslas \x2013 rinkti ir sisteminti pasaulio "
632      L"informacij\x0105 bei padaryti j\x0105 prieinam\x0105 ir "
633      L"nauding\x0105 visiems."
634    }, {
635      // Latvian
636      "lv-LV",
637      L"Google uzdevums ir k\x0101rtot pasaules inform\x0101"
638      L"ciju un padar\x012Bt to univers\x0101li pieejamu un noder\x012Bgu."
639    }, {
640      // Norwegian
641      "nb-NO",
642      // L"Googles " - to be added.
643      L"m\x00E5l er \x00E5 organisere informasjonen i verden og "
644      L"gj\x00F8re den tilgjengelig og nyttig for alle."
645    }, {
646      // Dutch
647      "nl-NL",
648      L"Het doel van Google is om alle informatie wereldwijd toegankelijk "
649      L"en bruikbaar te maken."
650    }, {
651      // Polish
652      "pl-PL",
653      L"Misj\x0105 Google jest uporz\x0105" L"dkowanie \x015Bwiatowych "
654      L"zasob\x00F3w informacji, aby sta\x0142y si\x0119 one powszechnie "
655      L"dost\x0119pne i u\x017Cyteczne."
656    }, {
657      // Portuguese (Brazil)
658      "pt-BR",
659      L"A miss\x00E3o do "
660#if !defined(OS_MACOSX)
661      L"Google "
662#endif
663      L"\x00E9 organizar as informa\x00E7\x00F5"
664      L"es do mundo todo e "
665#if !defined(OS_MACOSX)
666      L"torn\x00E1-las "
667#endif
668      L"acess\x00EDveis e \x00FAteis em car\x00E1ter universal."
669    }, {
670      // Portuguese (Portugal)
671      "pt-PT",
672      L"O "
673#if !defined(OS_MACOSX)
674      L"Google "
675#endif
676      L"tem por miss\x00E3o organizar a informa\x00E7\x00E3o do "
677      L"mundo e "
678#if !defined(OS_MACOSX)
679      L"torn\x00E1-la "
680#endif
681      L"universalmente acess\x00EDvel e \x00FAtil"
682    }, {
683      // Romanian
684      "ro-RO",
685      L"Misiunea Google este de a organiza informa\x021B3iile lumii \x0219i de "
686      L"a le face accesibile \x0219i utile la nivel universal."
687    }, {
688      // Russian
689      "ru-RU",
690      L"\x041C\x0438\x0441\x0441\x0438\x044F Google "
691      L"\x0441\x043E\x0441\x0442\x043E\x0438\x0442 \x0432 "
692      L"\x043E\x0440\x0433\x0430\x043D\x0438\x0437\x0430\x0446\x0438\x0438 "
693      L"\x043C\x0438\x0440\x043E\x0432\x043E\x0439 "
694      L"\x0438\x043D\x0444\x043E\x0440\x043C\x0430\x0446\x0438\x0438, "
695      L"\x043E\x0431\x0435\x0441\x043F\x0435\x0447\x0435\x043D\x0438\x0438 "
696      L"\x0435\x0435 "
697      L"\x0434\x043E\x0441\x0442\x0443\x043F\x043D\x043E\x0441\x0442\x0438 "
698      L"\x0438 \x043F\x043E\x043B\x044C\x0437\x044B \x0434\x043B\x044F "
699      L"\x0432\x0441\x0435\x0445."
700      // A Russian word including U+0451. (Bug 15558 <http://crbug.com/15558>)
701      L"\x0451\x043B\x043A\x0430"
702    }, {
703      // Serbo-Croatian (Serbian Latin)
704      "sh",
705      L"Google-ova misija je da organizuje sve informacije na svetu i "
706      L"u\x010dini ih univerzal-no dostupnim i korisnim."
707    }, {
708      // Serbian
709      "sr",
710      L"\x0047\x006f\x006f\x0067\x006c\x0065\x002d\x043e\x0432\x0430 "
711      L"\x043c\x0438\x0441\x0438\x0458\x0430 \x0458\x0435 \x0434\x0430 "
712      L"\x043e\x0440\x0433\x0430\x043d\x0438\x0437\x0443\x0458\x0435 "
713      L"\x0441\x0432\x0435 "
714      L"\x0438\x043d\x0444\x043e\x0440\x043c\x0430\x0446\x0438\x0458\x0435 "
715      L"\x043d\x0430 \x0441\x0432\x0435\x0442\x0443 \x0438 "
716      L"\x0443\x0447\x0438\x043d\x0438 \x0438\x0445 "
717      L"\x0443\x043d\x0438\x0432\x0435\x0440\x0437\x0430\x043b\x043d\x043e "
718      L"\x0434\x043e\x0441\x0442\x0443\x043f\x043d\x0438\x043c \x0438 "
719      L"\x043a\x043e\x0440\x0438\x0441\x043d\x0438\x043c."
720    }, {
721      // Slovak
722      "sk-SK",
723      L"Spolo\x010Dnos\x0165 Google si dala za \x00FAlohu usporiada\x0165 "
724      L"inform\x00E1\x0063ie "
725      L"z cel\x00E9ho sveta a zabezpe\x010Di\x0165, "
726      L"aby boli v\x0161eobecne dostupn\x00E9 a u\x017Eito\x010Dn\x00E9."
727    }, {
728      // Slovenian
729      "sl-SI",
730      // L"Googlovo " - to be added.
731      L"poslanstvo je organizirati svetovne informacije in "
732      L"omogo\x010Diti njihovo dostopnost in s tem uporabnost za vse."
733    }, {
734      // Swedish
735      "sv-SE",
736      L"Googles m\x00E5ls\x00E4ttning \x00E4r att ordna v\x00E4rldens "
737      L"samlade information och g\x00F6ra den tillg\x00E4nglig f\x00F6r alla."
738    }, {
739      // Turkish
740      "tr-TR",
741      // L"Google\x2019\x0131n " - to be added.
742      L"misyonu, d\x00FCnyadaki t\x00FCm bilgileri "
743      L"organize etmek ve evrensel olarak eri\x015Filebilir ve "
744      L"kullan\x0131\x015Fl\x0131 k\x0131lmakt\x0131r."
745    }, {
746      // Ukranian
747      "uk-UA",
748      L"\x041c\x0456\x0441\x0456\x044f "
749      L"\x043a\x043e\x043c\x043f\x0430\x043d\x0456\x0457 Google "
750      L"\x043f\x043e\x043b\x044f\x0433\x0430\x0454 \x0432 "
751      L"\x0442\x043e\x043c\x0443, \x0449\x043e\x0431 "
752      L"\x0443\x043f\x043e\x0440\x044f\x0434\x043a\x0443\x0432\x0430\x0442"
753      L"\x0438 \x0456\x043d\x0444\x043e\x0440\x043c\x0430\x0446\x0456\x044e "
754      L"\x0437 \x0443\x0441\x044c\x043e\x0433\x043e "
755      L"\x0441\x0432\x0456\x0442\x0443 \x0442\x0430 "
756      L"\x0437\x0440\x043e\x0431\x0438\x0442\x0438 \x0457\x0457 "
757      L"\x0443\x043d\x0456\x0432\x0435\x0440\x0441\x0430\x043b\x044c\x043d"
758      L"\x043e \x0434\x043e\x0441\x0442\x0443\x043f\x043d\x043e\x044e "
759      L"\x0442\x0430 \x043a\x043e\x0440\x0438\x0441\x043d\x043e\x044e."
760    }, {
761      // Vietnamese
762      "vi-VN",
763      L"Nhi\x1EC7m v\x1EE5 c\x1EE7\x0061 "
764      L"Google la \x0111\x1EC3 t\x1ED5 ch\x1EE9\x0063 "
765      L"c\x00E1\x0063 th\x00F4ng tin c\x1EE7\x0061 "
766      L"th\x1EBF gi\x1EDBi va l\x00E0m cho n\x00F3 universal c\x00F3 "
767      L"th\x1EC3 truy c\x1EADp va h\x1EEFu d\x1EE5ng h\x01A1n."
768    }, {
769      // Korean
770      "ko",
771      L"Google\xC758 \xBAA9\xD45C\xB294 \xC804\xC138\xACC4\xC758 "
772      L"\xC815\xBCF4\xB97C \xCCB4\xACC4\xD654\xD558\xC5EC \xBAA8\xB450\xAC00 "
773      L"\xD3B8\xB9AC\xD558\xAC8C \xC774\xC6A9\xD560 \xC218 "
774      L"\xC788\xB3C4\xB85D \xD558\xB294 \xAC83\xC785\xB2C8\xB2E4."
775    }, {
776      // Albanian
777      "sq",
778      L"Misioni i Google \x00EBsht\x00EB q\x00EB t\x00EB organizoj\x00EB "
779      L"informacionin e bot\x00EBs dhe t\x00EB b\x00EBjn\x00EB at\x00EB "
780      L"universalisht t\x00EB arritshme dhe t\x00EB dobishme."
781    }, {
782      // Tamil
783      "ta",
784      L"Google \x0B87\x0BA9\x0BCD "
785      L"\x0BA8\x0BC7\x0BBE\x0B95\x0BCD\x0B95\x0BAE\x0BCD "
786      L"\x0B89\x0BB2\x0B95\x0BBF\x0BA9\x0BCD \x0BA4\x0B95\x0BB5\x0BB2\x0BCD "
787      L"\x0B8F\x0BB1\x0BCD\x0BAA\x0BBE\x0B9F\x0BC1 \x0B87\x0BA4\x0BC1 "
788      L"\x0B89\x0BB2\x0B95\x0BB3\x0BBE\x0BB5\x0BBF\x0BAF "
789      L"\x0B85\x0BA3\x0BC1\x0B95\x0B95\x0BCD \x0B95\x0BC2\x0B9F\x0BBF\x0BAF "
790      L"\x0BAE\x0BB1\x0BCD\x0BB1\x0BC1\x0BAE\x0BCD "
791      L"\x0BAA\x0BAF\x0BA9\x0BC1\x0BB3\x0BCD\x0BB3 "
792      L"\x0B9A\x0BC6\x0BAF\x0BCD\x0BAF \x0B89\x0BB3\x0BCD\x0BB3\x0BA4\x0BC1."
793    }, {
794      // Tajik
795      "tg",
796      L"\x041c\x0438\x0441\x0441\x0438\x044f\x0438 Google \x0438\x043d "
797      L"\x043c\x0443\x0440\x0430\x0442\x0442\x0430\x0431 "
798      L"\x0441\x043e\x0445\x0442\x0430\x043d\x0438 "
799      L"\x043c\x0430\x044a\x043b\x0443\x043c\x043e\x0442\x04b3\x043e\x0438 "
800      L"\x043c\x0430\x0432\x04b7\x0443\x0434\x0430, \x043e\x0441\x043e\x043d "
801      L"\x043d\x0430\x043c\x0443\x0434\x0430\x043d\x0438 "
802      L"\x0438\x0441\x0442\x0438\x0444\x043e\x0434\x0430\x0431\x0430\x0440"
803      L"\x04e3 \x0432\x0430 \x0434\x0430\x0441\x0442\x0440\x0430\x0441\x0438 "
804      L"\x0443\x043c\x0443\x043c "
805      L"\x0433\x0430\x0440\x0434\x043e\x043d\x0438\x0434\x0430\x043d\x0438 "
806      L"\x043e\x043d\x04b3\x043e \x0430\x0441\x0442."
807    },
808  };
809
810  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
811    ReinitializeSpellCheck(kTestCases[i].language);
812    size_t input_length = 0;
813    if (kTestCases[i].input != NULL)
814      input_length = wcslen(kTestCases[i].input);
815
816    int misspelling_start = 0;
817    int misspelling_length = 0;
818    bool result = spell_check()->SpellCheckWord(
819        base::WideToUTF16(kTestCases[i].input).c_str(),
820        static_cast<int>(input_length),
821        0,
822        &misspelling_start,
823        &misspelling_length, NULL);
824
825    EXPECT_TRUE(result)
826        << "\""
827        << std::wstring(kTestCases[i].input).substr(
828               misspelling_start, misspelling_length)
829        << "\" is misspelled in "
830        << kTestCases[i].language
831        << ".";
832    EXPECT_EQ(0, misspelling_start);
833    EXPECT_EQ(0, misspelling_length);
834  }
835}
836
837TEST_F(SpellCheckTest, GetAutoCorrectionWord_EN_US) {
838  static const struct {
839    // A misspelled word.
840    const char* input;
841
842    // An expected result for this test case.
843    // Should be an empty string if there are no suggestions for auto correct.
844    const char* expected_result;
845  } kTestCases[] = {
846    {"teh", "the"},
847    {"moer", "more"},
848    {"watre", "water"},
849    {"noen", ""},
850    {"what", ""},
851  };
852
853  EnableAutoCorrect(true);
854
855  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
856    base::string16 misspelled_word(base::UTF8ToUTF16(kTestCases[i].input));
857    base::string16 expected_autocorrect_word(
858        base::UTF8ToUTF16(kTestCases[i].expected_result));
859    base::string16 autocorrect_word = spell_check()->GetAutoCorrectionWord(
860        misspelled_word, 0);
861
862    // Check for spelling.
863    EXPECT_EQ(expected_autocorrect_word, autocorrect_word);
864  }
865}
866
867// Verify that our SpellCheck::SpellCheckWord() returns false when it checks
868// misspelled words.
869TEST_F(SpellCheckTest, MisspelledWords) {
870  static const struct {
871    const char* language;
872    const wchar_t* input;
873  } kTestCases[] = {
874    {
875      // A misspelled word for English
876      "en-US",
877      L"aaaaaaaaaa",
878    }, {
879      // A misspelled word for Greek.
880      "el-GR",
881      L"\x03B1\x03B1\x03B1\x03B1\x03B1\x03B1\x03B1\x03B1\x03B1\x03B1",
882    }, {
883      // A misspelled word for Hebrew
884      "he-IL",
885      L"\x05D0\x05D0\x05D0\x05D0\x05D0\x05D0\x05D0\x05D0\x05D0\x05D0",
886    }, {
887      // Hindi
888      "hi-IN",
889      L"\x0905\x0905\x0905\x0905\x0905\x0905\x0905\x0905\x0905\x0905",
890    }, {
891      // A misspelled word for Russian
892      "ru-RU",
893      L"\x0430\x0430\x0430\x0430\x0430\x0430\x0430\x0430\x0430\x0430",
894    },
895  };
896
897  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
898    ReinitializeSpellCheck(kTestCases[i].language);
899
900    base::string16 word(base::WideToUTF16(kTestCases[i].input));
901    int word_length = static_cast<int>(word.length());
902    int misspelling_start = 0;
903    int misspelling_length = 0;
904    bool result = spell_check()->SpellCheckWord(word.c_str(),
905                                                word_length,
906                                                0,
907                                                &misspelling_start,
908                                                &misspelling_length,
909                                                NULL);
910    EXPECT_FALSE(result);
911    EXPECT_EQ(0, misspelling_start);
912    EXPECT_EQ(word_length, misspelling_length);
913  }
914}
915
916// Since SpellCheck::SpellCheckParagraph is not implemented on Mac,
917// we skip these SpellCheckParagraph tests on Mac.
918#if !defined(OS_MACOSX)
919
920// Make sure SpellCheckParagraph does not crash if the input is empty.
921TEST_F(SpellCheckTest, SpellCheckParagraphEmptyParagraph) {
922  std::vector<SpellCheckResult> expected;
923  TestSpellCheckParagraph(base::UTF8ToUTF16(""), expected);
924}
925
926// A simple test case having no misspellings.
927TEST_F(SpellCheckTest, SpellCheckParagraphNoMisspellings) {
928  const base::string16 text = base::UTF8ToUTF16("apple");
929  std::vector<SpellCheckResult> expected;
930  TestSpellCheckParagraph(text, expected);
931}
932
933// A simple test case having one misspelling.
934TEST_F(SpellCheckTest, SpellCheckParagraphSingleMisspellings) {
935  const base::string16 text = base::UTF8ToUTF16("zz");
936  std::vector<SpellCheckResult> expected;
937  expected.push_back(SpellCheckResult(
938      SpellCheckResult::SPELLING, 0, 2));
939
940  TestSpellCheckParagraph(text, expected);
941}
942
943// A simple test case having multiple misspellings.
944TEST_F(SpellCheckTest, SpellCheckParagraphMultipleMisspellings) {
945  const base::string16 text = base::UTF8ToUTF16("zz, zz");
946  std::vector<SpellCheckResult> expected;
947  expected.push_back(SpellCheckResult(
948      SpellCheckResult::SPELLING, 0, 2));
949  expected.push_back(SpellCheckResult(
950      SpellCheckResult::SPELLING, 4, 2));
951
952  TestSpellCheckParagraph(text, expected);
953}
954
955// Make sure a relatively long (correct) sentence can be spellchecked.
956TEST_F(SpellCheckTest, SpellCheckParagraphLongSentence) {
957  std::vector<SpellCheckResult> expected;
958  // The text is taken from US constitution preamble.
959  const base::string16 text = base::UTF8ToUTF16(
960      "We the people of the United States, in order to form a more perfect "
961      "union, establish justice, insure domestic tranquility, provide for "
962      "the common defense, promote the general welfare, and secure the "
963      "blessings of liberty to ourselves and our posterity, do ordain and "
964      "establish this Constitution for the United States of America.");
965
966  TestSpellCheckParagraph(text, expected);
967}
968
969// Make sure all misspellings can be found in a relatively long sentence.
970TEST_F(SpellCheckTest, SpellCheckParagraphLongSentenceMultipleMisspellings) {
971  std::vector<SpellCheckResult> expected;
972
973  // All 'the' are converted to 'hte' in US consitition preamble.
974  const base::string16 text = base::UTF8ToUTF16(
975      "We hte people of hte United States, in order to form a more perfect "
976      "union, establish justice, insure domestic tranquility, provide for "
977      "hte common defense, promote hte general welfare, and secure hte "
978      "blessings of liberty to ourselves and our posterity, do ordain and "
979      "establish this Constitution for hte United States of America.");
980
981  expected.push_back(SpellCheckResult(
982      SpellCheckResult::SPELLING, 3, 3));
983  expected.push_back(SpellCheckResult(
984      SpellCheckResult::SPELLING, 17, 3));
985  expected.push_back(SpellCheckResult(
986      SpellCheckResult::SPELLING, 135, 3));
987  expected.push_back(SpellCheckResult(
988      SpellCheckResult::SPELLING, 163, 3));
989  expected.push_back(SpellCheckResult(
990      SpellCheckResult::SPELLING, 195, 3));
991  expected.push_back(SpellCheckResult(
992      SpellCheckResult::SPELLING, 298, 3));
993
994  TestSpellCheckParagraph(text, expected);
995}
996
997// We also skip RequestSpellCheck tests on Mac, because a system spellchecker
998// is used on Mac instead of SpellCheck::RequestTextChecking.
999
1000// Make sure RequestTextChecking does not crash if input is empty.
1001TEST_F(SpellCheckTest, RequestSpellCheckWithEmptyString) {
1002  MockTextCheckingCompletion completion;
1003
1004  spell_check()->RequestTextChecking(base::string16(), &completion);
1005
1006  base::MessageLoop::current()->RunUntilIdle();
1007
1008  EXPECT_EQ(completion.completion_count_, 1U);
1009}
1010
1011// A simple test case having no misspellings.
1012TEST_F(SpellCheckTest, RequestSpellCheckWithoutMisspelling) {
1013  MockTextCheckingCompletion completion;
1014
1015  const base::string16 text = base::ASCIIToUTF16("hello");
1016  spell_check()->RequestTextChecking(text, &completion);
1017
1018  base::MessageLoop::current()->RunUntilIdle();
1019
1020  EXPECT_EQ(completion.completion_count_, 1U);
1021}
1022
1023// A simple test case having one misspelling.
1024TEST_F(SpellCheckTest, RequestSpellCheckWithSingleMisspelling) {
1025  MockTextCheckingCompletion completion;
1026
1027  const base::string16 text = base::ASCIIToUTF16("apple, zz");
1028  spell_check()->RequestTextChecking(text, &completion);
1029
1030  base::MessageLoop::current()->RunUntilIdle();
1031
1032  EXPECT_EQ(completion.completion_count_, 1U);
1033  EXPECT_EQ(completion.last_results_.size(), 1U);
1034  EXPECT_EQ(completion.last_results_[0].location, 7);
1035  EXPECT_EQ(completion.last_results_[0].length, 2);
1036}
1037
1038// A simple test case having a few misspellings.
1039TEST_F(SpellCheckTest, RequestSpellCheckWithMisspellings) {
1040  MockTextCheckingCompletion completion;
1041
1042  const base::string16 text = base::ASCIIToUTF16("apple, zz, orange, zz");
1043  spell_check()->RequestTextChecking(text, &completion);
1044
1045  base::MessageLoop::current()->RunUntilIdle();
1046
1047  EXPECT_EQ(completion.completion_count_, 1U);
1048  EXPECT_EQ(completion.last_results_.size(), 2U);
1049  EXPECT_EQ(completion.last_results_[0].location, 7);
1050  EXPECT_EQ(completion.last_results_[0].length, 2);
1051  EXPECT_EQ(completion.last_results_[1].location, 19);
1052  EXPECT_EQ(completion.last_results_[1].length, 2);
1053}
1054
1055// A test case that multiple requests comes at once. Make sure all
1056// requests are processed.
1057TEST_F(SpellCheckTest, RequestSpellCheckWithMultipleRequests) {
1058  MockTextCheckingCompletion completion[3];
1059
1060  const base::string16 text[3] = {
1061    base::ASCIIToUTF16("what, zz"),
1062    base::ASCIIToUTF16("apple, zz"),
1063    base::ASCIIToUTF16("orange, zz")
1064  };
1065
1066  for (int i = 0; i < 3; ++i)
1067    spell_check()->RequestTextChecking(text[i], &completion[i]);
1068
1069  base::MessageLoop::current()->RunUntilIdle();
1070
1071  for (int i = 0; i < 3; ++i) {
1072    EXPECT_EQ(completion[i].completion_count_, 1U);
1073    EXPECT_EQ(completion[i].last_results_.size(), 1U);
1074    EXPECT_EQ(completion[i].last_results_[0].location, 6 + i);
1075    EXPECT_EQ(completion[i].last_results_[0].length, 2);
1076  }
1077}
1078
1079// A test case that spellchecking is requested before initializing.
1080// In this case, we postpone to post a request.
1081TEST_F(SpellCheckTest, RequestSpellCheckWithoutInitialization) {
1082  UninitializeSpellCheck();
1083
1084  MockTextCheckingCompletion completion;
1085  const base::string16 text = base::ASCIIToUTF16("zz");
1086
1087  spell_check()->RequestTextChecking(text, &completion);
1088
1089  // The task will not be posted yet.
1090  base::MessageLoop::current()->RunUntilIdle();
1091  EXPECT_EQ(completion.completion_count_, 0U);
1092}
1093
1094// Requests several spellchecking before initializing. Except the last one,
1095// posting requests is cancelled and text is rendered as correct one.
1096TEST_F(SpellCheckTest, RequestSpellCheckMultipleTimesWithoutInitialization) {
1097  UninitializeSpellCheck();
1098
1099  MockTextCheckingCompletion completion[3];
1100  const base::string16 text[3] = {
1101    base::ASCIIToUTF16("what, zz"),
1102    base::ASCIIToUTF16("apple, zz"),
1103    base::ASCIIToUTF16("orange, zz")
1104  };
1105
1106  // Calls RequestTextchecking a few times.
1107  for (int i = 0; i < 3; ++i)
1108    spell_check()->RequestTextChecking(text[i], &completion[i]);
1109
1110  // The last task will be posted after initialization, however the other
1111  // requests should be pressed without spellchecking.
1112  base::MessageLoop::current()->RunUntilIdle();
1113  for (int i = 0; i < 2; ++i)
1114    EXPECT_EQ(completion[i].completion_count_, 1U);
1115  EXPECT_EQ(completion[2].completion_count_, 0U);
1116
1117  // Checks the last request is processed after initialization.
1118  InitializeSpellCheck("en-US");
1119
1120  // Calls PostDelayedSpellCheckTask instead of OnInit here for simplicity.
1121  spell_check()->PostDelayedSpellCheckTask(
1122      spell_check()->pending_request_param_.release());
1123  base::MessageLoop::current()->RunUntilIdle();
1124  for (int i = 0; i < 3; ++i)
1125    EXPECT_EQ(completion[i].completion_count_, 1U);
1126}
1127
1128TEST_F(SpellCheckTest, CreateTextCheckingResults) {
1129  // Verify that the SpellCheck class keeps the spelling marker added to a
1130  // misspelled word "zz".
1131  {
1132    base::string16 text = base::ASCIIToUTF16("zz");
1133    std::vector<SpellCheckResult> spellcheck_results;
1134    spellcheck_results.push_back(SpellCheckResult(
1135        SpellCheckResult::SPELLING, 0, 2, base::string16()));
1136    blink::WebVector<blink::WebTextCheckingResult> textcheck_results;
1137    spell_check()->CreateTextCheckingResults(SpellCheck::USE_NATIVE_CHECKER,
1138                                             0,
1139                                             text,
1140                                             spellcheck_results,
1141                                             &textcheck_results);
1142    EXPECT_EQ(spellcheck_results.size(), textcheck_results.size());
1143    EXPECT_EQ(blink::WebTextDecorationTypeSpelling,
1144              textcheck_results[0].decoration);
1145    EXPECT_EQ(spellcheck_results[0].location, textcheck_results[0].location);
1146    EXPECT_EQ(spellcheck_results[0].length, textcheck_results[0].length);
1147  }
1148
1149  // Verify that the SpellCheck class replaces the spelling marker added to a
1150  // contextually-misspelled word "bean" with a grammar marker.
1151  {
1152    base::string16 text = base::ASCIIToUTF16("I have bean to USA.");
1153    std::vector<SpellCheckResult> spellcheck_results;
1154    spellcheck_results.push_back(SpellCheckResult(
1155        SpellCheckResult::SPELLING, 7, 4, base::string16()));
1156    blink::WebVector<blink::WebTextCheckingResult> textcheck_results;
1157    spell_check()->CreateTextCheckingResults(SpellCheck::USE_NATIVE_CHECKER,
1158                                             0,
1159                                             text,
1160                                             spellcheck_results,
1161                                             &textcheck_results);
1162    EXPECT_EQ(spellcheck_results.size(), textcheck_results.size());
1163    EXPECT_EQ(blink::WebTextDecorationTypeGrammar,
1164              textcheck_results[0].decoration);
1165    EXPECT_EQ(spellcheck_results[0].location, textcheck_results[0].location);
1166    EXPECT_EQ(spellcheck_results[0].length, textcheck_results[0].length);
1167  }
1168}
1169
1170#endif
1171
1172// Checks some words that should be present in all English dictionaries.
1173TEST_F(SpellCheckTest, EnglishWords) {
1174  static const struct {
1175    const char* input;
1176    bool should_pass;
1177  } kTestCases[] = {
1178    // Issue 146093: "Chromebook" and "Chromebox" not included in spell-checking
1179    // dictionary.
1180    {"Chromebook", true},
1181    {"Chromebooks", true},
1182    {"Chromebox", true},
1183    {"Chromeboxes", true},
1184    {"Chromeblade", true},
1185    {"Chromeblades", true},
1186    {"Chromebase", true},
1187    {"Chromebases", true},
1188    // Issue 94708: Spell-checker incorrectly reports whisky as misspelled.
1189    {"whisky", true},
1190    {"whiskey", true},
1191    {"whiskies", true},
1192    // Issue 98678: "Recency" should be included in client-side dictionary.
1193    {"recency", true},
1194    {"recencies", false},
1195    // Issue 140486
1196    {"movie", true},
1197    {"movies", true},
1198  };
1199
1200  static const char* kLocales[] = { "en-GB", "en-US", "en-CA", "en-AU" };
1201
1202  for (size_t j = 0; j < arraysize(kLocales); ++j) {
1203    ReinitializeSpellCheck(kLocales[j]);
1204    for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
1205      size_t input_length = 0;
1206      if (kTestCases[i].input != NULL)
1207        input_length = strlen(kTestCases[i].input);
1208
1209      int misspelling_start = 0;
1210      int misspelling_length = 0;
1211      bool result = spell_check()->SpellCheckWord(
1212          base::ASCIIToUTF16(kTestCases[i].input).c_str(),
1213          static_cast<int>(input_length),
1214          0,
1215          &misspelling_start,
1216          &misspelling_length, NULL);
1217
1218      EXPECT_EQ(kTestCases[i].should_pass, result) << kTestCases[i].input <<
1219          " in " << kLocales[j];
1220    }
1221  }
1222}
1223
1224// Checks that NOSUGGEST works in English dictionaries.
1225TEST_F(SpellCheckTest, NoSuggest) {
1226  static const struct {
1227    const char* input;
1228    const char* suggestion;
1229    const char* locale;
1230    bool should_pass;
1231  } kTestCases[] = {
1232    {"suckerbert", "cocksucker",  "en-GB", true},
1233    {"suckerbert", "cocksucker",  "en-US", true},
1234    {"suckerbert", "cocksucker",  "en-CA", true},
1235    {"suckerbert", "cocksucker",  "en-AU", true},
1236    {"suckerbert", "cocksuckers", "en-GB", true},
1237    {"suckerbert", "cocksuckers", "en-US", true},
1238    {"suckerbert", "cocksuckers", "en-CA", true},
1239    {"suckerbert", "cocksuckers", "en-AU", true},
1240    {"Batasunaa",  "Batasuna",    "ca-ES", true},
1241    {"pornoo",     "porno",       "it-IT", true},
1242    {"catass",     "catas",       "lt-LT", true},
1243    {"kuracc",     "kurac",       "sl-SI", true},
1244    {"pittt",      "pitt",        "sv-SE", true},
1245  };
1246
1247  size_t test_cases_size = ARRAYSIZE_UNSAFE(kTestCases);
1248  for (size_t i = 0; i < test_cases_size; ++i) {
1249    ReinitializeSpellCheck(kTestCases[i].locale);
1250    size_t suggestion_length = 0;
1251    if (kTestCases[i].suggestion != NULL)
1252      suggestion_length = strlen(kTestCases[i].suggestion);
1253
1254    // First check that the NOSUGGEST flag didn't mark this word as not being in
1255    // the dictionary.
1256    int misspelling_start = 0;
1257    int misspelling_length = 0;
1258    bool result = spell_check()->SpellCheckWord(
1259        base::ASCIIToUTF16(kTestCases[i].suggestion).c_str(),
1260        static_cast<int>(suggestion_length),
1261        0,
1262        &misspelling_start,
1263        &misspelling_length, NULL);
1264
1265    EXPECT_EQ(kTestCases[i].should_pass, result) << kTestCases[i].suggestion <<
1266        " in " << kTestCases[i].locale;
1267
1268    // Now verify that this test case does not show up as a suggestion.
1269    std::vector<base::string16> suggestions;
1270    size_t input_length = 0;
1271    if (kTestCases[i].input != NULL)
1272      input_length = strlen(kTestCases[i].input);
1273    result = spell_check()->SpellCheckWord(
1274        base::ASCIIToUTF16(kTestCases[i].input).c_str(),
1275        static_cast<int>(input_length),
1276        0,
1277        &misspelling_start,
1278        &misspelling_length,
1279        &suggestions);
1280    // Input word should be a misspelling.
1281    EXPECT_FALSE(result) << kTestCases[i].input
1282                         << " is not a misspelling in "
1283                         << kTestCases[i].locale;
1284    // Check if the suggested words occur.
1285    for (int j = 0; j < static_cast<int>(suggestions.size()); j++) {
1286      for (size_t t = 0; t < test_cases_size; t++) {
1287        int compare_result = suggestions.at(j).compare(
1288            base::ASCIIToUTF16(kTestCases[t].suggestion));
1289        EXPECT_FALSE(compare_result == 0) << kTestCases[t].suggestion <<
1290            " in " << kTestCases[i].locale;
1291      }
1292    }
1293  }
1294}
1295
1296// Check that the correct dictionary files are checked in.
1297TEST_F(SpellCheckTest, DictionaryFiles) {
1298  std::vector<std::string> spellcheck_languages;
1299  chrome::spellcheck_common::SpellCheckLanguages(&spellcheck_languages);
1300  EXPECT_FALSE(spellcheck_languages.empty());
1301
1302  base::FilePath hunspell = GetHunspellDirectory();
1303  for (size_t i = 0; i < spellcheck_languages.size(); ++i) {
1304    base::FilePath dict = chrome::spellcheck_common::GetVersionedFileName(
1305        spellcheck_languages[i], hunspell);
1306    EXPECT_TRUE(base::PathExists(dict)) << dict.value() << " not found";
1307  }
1308}
1309
1310// TODO(groby): Add a test for hunspell itself, when MAXWORDLEN is exceeded.
1311TEST_F(SpellCheckTest, SpellingEngine_CheckSpelling) {
1312  static const struct {
1313    const char* word;
1314    bool expected_result;
1315  } kTestCases[] = {
1316    { "", true },
1317    { "automatic", true },
1318    { "hello", true },
1319    { "forglobantic", false },
1320    { "xfdssfsdfaasds", false },
1321    {  // 64 chars are the longest word to check - this should fail checking.
1322      "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkl",
1323      false
1324    },
1325    {  // Any word longer than 64 chars should be exempt from checking.
1326      "reallylongwordthatabsolutelyexceedsthespecifiedcharacterlimitabit",
1327      true
1328    }
1329  };
1330
1331  // Initialization magic - call InitializeIfNeeded twice. The first one simply
1332  // flags internal state that a dictionary was requested. The second one will
1333  // take the passed-in file and initialize hunspell with it. (The file was
1334  // passed to hunspell in the ctor for the test fixture).
1335  // This needs to be done since we need to ensure the SpellingEngine object
1336  // contained in |spellcheck_| from the test fixture does get initialized.
1337  // TODO(groby): Clean up this mess.
1338  InitializeIfNeeded();
1339  ASSERT_FALSE(InitializeIfNeeded());
1340
1341  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
1342    bool result = CheckSpelling(kTestCases[i].word, 0);
1343    EXPECT_EQ(kTestCases[i].expected_result, result) <<
1344        "Failed test for " << kTestCases[i].word;
1345  }
1346}
1347
1348// Chrome should not suggest "Othello" for "hellllo" or "identically" for
1349// "accidently".
1350TEST_F(SpellCheckTest, LogicalSuggestions) {
1351  static const struct {
1352    const char* misspelled;
1353    const char* suggestion;
1354  } kTestCases[] = {
1355    { "hellllo", "hello" },
1356    { "accidently", "accidentally" }
1357  };
1358
1359  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
1360    int misspelling_start = 0;
1361    int misspelling_length = 0;
1362    std::vector<base::string16> suggestions;
1363    EXPECT_FALSE(spell_check()->SpellCheckWord(
1364        base::ASCIIToUTF16(kTestCases[i].misspelled).c_str(),
1365        strlen(kTestCases[i].misspelled),
1366        0,
1367        &misspelling_start,
1368        &misspelling_length,
1369        &suggestions));
1370    EXPECT_GE(suggestions.size(), static_cast<size_t>(1));
1371    if (suggestions.size() > 0)
1372      EXPECT_EQ(suggestions[0], base::ASCIIToUTF16(kTestCases[i].suggestion));
1373  }
1374}
1375