1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/bind.h"
6#include "base/file_util.h"
7#include "base/files/scoped_temp_dir.h"
8#include "base/prefs/pref_service.h"
9#include "chrome/browser/character_encoding.h"
10#include "chrome/browser/net/url_request_mock_util.h"
11#include "chrome/browser/profiles/profile.h"
12#include "chrome/browser/ui/browser.h"
13#include "chrome/browser/ui/browser_commands.h"
14#include "chrome/browser/ui/tabs/tab_strip_model.h"
15#include "chrome/common/pref_names.h"
16#include "chrome/test/base/in_process_browser_test.h"
17#include "chrome/test/base/ui_test_utils.h"
18#include "content/public/browser/browser_thread.h"
19#include "content/public/browser/download_manager.h"
20#include "content/public/browser/navigation_controller.h"
21#include "content/public/browser/notification_service.h"
22#include "content/public/browser/notification_source.h"
23#include "content/public/browser/notification_types.h"
24#include "content/public/browser/web_contents.h"
25#include "content/public/test/test_navigation_observer.h"
26#include "content/test/net/url_request_mock_http_job.h"
27
28namespace {
29
30struct EncodingTestData {
31  const char* file_name;
32  const char* encoding_name;
33};
34
35const EncodingTestData kEncodingTestDatas[] = {
36  { "Big5.html", "Big5" },
37  { "EUC-JP.html", "EUC-JP" },
38  { "gb18030.html", "gb18030" },
39  { "iso-8859-1.html", "ISO-8859-1" },
40  { "ISO-8859-2.html", "ISO-8859-2" },
41  { "ISO-8859-4.html", "ISO-8859-4" },
42  { "ISO-8859-5.html", "ISO-8859-5" },
43  { "ISO-8859-6.html", "ISO-8859-6" },
44  { "ISO-8859-7.html", "ISO-8859-7" },
45  { "ISO-8859-8.html", "ISO-8859-8" },
46  { "ISO-8859-13.html", "ISO-8859-13" },
47  { "ISO-8859-15.html", "ISO-8859-15" },
48  { "KOI8-R.html", "KOI8-R" },
49  { "KOI8-U.html", "KOI8-U" },
50  { "macintosh.html", "macintosh" },
51  { "Shift-JIS.html", "Shift_JIS" },
52  { "US-ASCII.html", "ISO-8859-1" },  // http://crbug.com/15801
53  { "UTF-8.html", "UTF-8" },
54  { "UTF-16LE.html", "UTF-16LE" },
55  { "windows-874.html", "windows-874" },
56  { "EUC-KR.html", "EUC-KR" },
57  { "windows-1250.html", "windows-1250" },
58  { "windows-1251.html", "windows-1251" },
59  { "windows-1252.html", "windows-1252" },
60  { "windows-1253.html", "windows-1253" },
61  { "windows-1254.html", "windows-1254" },
62  { "windows-1255.html", "windows-1255" },
63  { "windows-1256.html", "windows-1256" },
64  { "windows-1257.html", "windows-1257" },
65  { "windows-1258.html", "windows-1258" }
66};
67
68class SavePackageFinishedObserver : public content::DownloadManager::Observer {
69 public:
70  SavePackageFinishedObserver(content::DownloadManager* manager,
71                              const base::Closure& callback)
72      : download_manager_(manager),
73        callback_(callback) {
74    download_manager_->AddObserver(this);
75  }
76
77  virtual ~SavePackageFinishedObserver() {
78    if (download_manager_)
79      download_manager_->RemoveObserver(this);
80  }
81
82  // DownloadManager::Observer:
83  virtual void OnSavePackageSuccessfullyFinished(
84      content::DownloadManager* manager, content::DownloadItem* item) OVERRIDE {
85    callback_.Run();
86  }
87  virtual void ManagerGoingDown(content::DownloadManager* manager) OVERRIDE {
88    download_manager_->RemoveObserver(this);
89    download_manager_ = NULL;
90  }
91
92 private:
93  content::DownloadManager* download_manager_;
94  base::Closure callback_;
95
96  DISALLOW_COPY_AND_ASSIGN(SavePackageFinishedObserver);
97};
98
99}  // namespace
100
101using content::BrowserThread;
102
103static const base::FilePath::CharType* kTestDir =
104    FILE_PATH_LITERAL("encoding_tests");
105
106class BrowserEncodingTest
107    : public InProcessBrowserTest,
108      public testing::WithParamInterface<EncodingTestData> {
109 protected:
110  BrowserEncodingTest() {}
111
112  // Saves the current page and verifies that the output matches the expected
113  // result.
114  void SaveAndCompare(const char* filename_to_write,
115                      const base::FilePath& expected) {
116    // Dump the page, the content of dump page should be identical to the
117    // expected result file.
118    base::FilePath full_file_name = save_dir_.AppendASCII(filename_to_write);
119    // We save the page as way of complete HTML file, which requires a directory
120    // name to save sub resources in it. Although this test file does not have
121    // sub resources, but the directory name is still required.
122    scoped_refptr<content::MessageLoopRunner> loop_runner(
123        new content::MessageLoopRunner);
124    SavePackageFinishedObserver observer(
125        content::BrowserContext::GetDownloadManager(browser()->profile()),
126        loop_runner->QuitClosure());
127    browser()->tab_strip_model()->GetActiveWebContents()->SavePage(
128        full_file_name, temp_sub_resource_dir_,
129        content::SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
130    loop_runner->Run();
131
132    base::FilePath expected_file_name = ui_test_utils::GetTestFilePath(
133        base::FilePath(kTestDir), expected);
134
135    EXPECT_TRUE(base::ContentsEqual(full_file_name, expected_file_name));
136  }
137
138  virtual void SetUpOnMainThread() OVERRIDE {
139    ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
140    save_dir_ = temp_dir_.path();
141    temp_sub_resource_dir_ = save_dir_.AppendASCII("sub_resource_files");
142
143    BrowserThread::PostTask(
144        BrowserThread::IO, FROM_HERE,
145        base::Bind(&chrome_browser_net::SetUrlRequestMocksEnabled, true));
146  }
147
148  base::ScopedTempDir temp_dir_;
149  base::FilePath save_dir_;
150  base::FilePath temp_sub_resource_dir_;
151};
152
153// TODO(jnd): 1. Some encodings are missing here. It'll be added later. See
154// http://crbug.com/13306.
155// 2. Add more files with multiple encoding name variants for each canonical
156// encoding name). Webkit layout tests cover some, but testing in the UI test is
157// also necessary.
158IN_PROC_BROWSER_TEST_P(BrowserEncodingTest, TestEncodingAliasMapping) {
159  const char* const kAliasTestDir = "alias_mapping";
160
161  base::FilePath test_dir_path = base::FilePath(kTestDir).AppendASCII(
162      kAliasTestDir);
163  base::FilePath test_file_path(test_dir_path);
164  test_file_path = test_file_path.AppendASCII(
165      GetParam().file_name);
166
167  GURL url = content::URLRequestMockHTTPJob::GetMockUrl(test_file_path);
168  ui_test_utils::NavigateToURL(browser(), url);
169  EXPECT_EQ(GetParam().encoding_name,
170            browser()->tab_strip_model()->GetActiveWebContents()->
171                GetEncoding());
172}
173
174INSTANTIATE_TEST_CASE_P(EncodingAliases,
175                        BrowserEncodingTest,
176                        testing::ValuesIn(kEncodingTestDatas));
177
178// Marked as flaky: see  http://crbug.com/44668
179IN_PROC_BROWSER_TEST_F(BrowserEncodingTest, TestOverrideEncoding) {
180  const char* const kTestFileName = "gb18030_with_iso88591_meta.html";
181  const char* const kExpectedFileName =
182      "expected_gb18030_saved_from_iso88591_meta.html";
183  const char* const kOverrideTestDir = "user_override";
184
185  base::FilePath test_dir_path =
186      base::FilePath(kTestDir).AppendASCII(kOverrideTestDir);
187  test_dir_path = test_dir_path.AppendASCII(kTestFileName);
188  GURL url = content::URLRequestMockHTTPJob::GetMockUrl(test_dir_path);
189  ui_test_utils::NavigateToURL(browser(), url);
190  content::WebContents* web_contents =
191      browser()->tab_strip_model()->GetActiveWebContents();
192  EXPECT_EQ("ISO-8859-1", web_contents->GetEncoding());
193
194  // Override the encoding to "gb18030".
195  const std::string selected_encoding =
196      CharacterEncoding::GetCanonicalEncodingNameByAliasName("gb18030");
197  content::TestNavigationObserver navigation_observer(web_contents);
198  web_contents->SetOverrideEncoding(selected_encoding);
199  navigation_observer.Wait();
200  EXPECT_EQ("gb18030", web_contents->GetEncoding());
201
202  base::FilePath expected_filename =
203      base::FilePath().AppendASCII(kOverrideTestDir).AppendASCII(
204          kExpectedFileName);
205  SaveAndCompare(kTestFileName, expected_filename);
206}
207
208// The following encodings are excluded from the auto-detection test because
209// it's a known issue that the current encoding detector does not detect them:
210// ISO-8859-4
211// ISO-8859-13
212// KOI8-U
213// macintosh
214// windows-874
215// windows-1252
216// windows-1253
217// windows-1257
218// windows-1258
219
220// For Hebrew, the expected encoding value is ISO-8859-8-I. See
221// http://crbug.com/2927 for more details.
222//
223// This test fails frequently on the win_rel trybot. See http://crbug.com/122053
224// It also times out frequently on Mac dbg. See http://crbug.com/351325
225#if defined(OS_WIN) || defined(OS_MACOSX)
226#define MAYBE_TestEncodingAutoDetect DISABLED_TestEncodingAutoDetect
227#else
228#define MAYBE_TestEncodingAutoDetect TestEncodingAutoDetect
229#endif
230// TODO(phajdan.jr): See if fix for http://crbug.com/122053 would help here.
231IN_PROC_BROWSER_TEST_F(BrowserEncodingTest, MAYBE_TestEncodingAutoDetect) {
232  struct EncodingAutoDetectTestData {
233    const char* test_file_name;   // File name of test data.
234    const char* expected_result;  // File name of expected results.
235    const char* expected_encoding;   // expected encoding.
236  };
237  const EncodingAutoDetectTestData kTestDatas[] = {
238      { "Big5_with_no_encoding_specified.html",
239        "expected_Big5_saved_from_no_encoding_specified.html",
240        "Big5" },
241      { "gb18030_with_no_encoding_specified.html",
242        "expected_gb18030_saved_from_no_encoding_specified.html",
243        "gb18030" },
244      { "iso-8859-1_with_no_encoding_specified.html",
245        "expected_iso-8859-1_saved_from_no_encoding_specified.html",
246        "ISO-8859-1" },
247      { "ISO-8859-5_with_no_encoding_specified.html",
248        "expected_ISO-8859-5_saved_from_no_encoding_specified.html",
249        "ISO-8859-5" },
250      { "ISO-8859-6_with_no_encoding_specified.html",
251        "expected_ISO-8859-6_saved_from_no_encoding_specified.html",
252        "ISO-8859-6" },
253      { "ISO-8859-7_with_no_encoding_specified.html",
254        "expected_ISO-8859-7_saved_from_no_encoding_specified.html",
255        "ISO-8859-7" },
256      { "ISO-8859-8_with_no_encoding_specified.html",
257        "expected_ISO-8859-8_saved_from_no_encoding_specified.html",
258        "ISO-8859-8-I" },
259      { "KOI8-R_with_no_encoding_specified.html",
260        "expected_KOI8-R_saved_from_no_encoding_specified.html",
261        "KOI8-R" },
262      { "Shift-JIS_with_no_encoding_specified.html",
263        "expected_Shift-JIS_saved_from_no_encoding_specified.html",
264        "Shift_JIS" },
265      { "UTF-8_with_no_encoding_specified.html",
266        "expected_UTF-8_saved_from_no_encoding_specified.html",
267        "UTF-8" },
268      { "EUC-KR_with_no_encoding_specified.html",
269        "expected_EUC-KR_saved_from_no_encoding_specified.html",
270        "EUC-KR" },
271      { "windows-1251_with_no_encoding_specified.html",
272        "expected_windows-1251_saved_from_no_encoding_specified.html",
273        "windows-1251" },
274      { "windows-1254_with_no_encoding_specified.html",
275        "expected_windows-1254_saved_from_no_encoding_specified.html",
276        "windows-1254" },
277      { "windows-1255_with_no_encoding_specified.html",
278        "expected_windows-1255_saved_from_no_encoding_specified.html",
279        "windows-1255" },
280      { "windows-1256_with_no_encoding_specified.html",
281        "expected_windows-1256_saved_from_no_encoding_specified.html",
282        "windows-1256" }
283    };
284  const char* const kAutoDetectDir = "auto_detect";
285  // Directory of the files of expected results.
286  const char* const kExpectedResultDir = "expected_results";
287
288  base::FilePath test_dir_path =
289      base::FilePath(kTestDir).AppendASCII(kAutoDetectDir);
290
291  // Set the default charset to one of encodings not supported by the current
292  // auto-detector (Please refer to the above comments) to make sure we
293  // incorrectly decode the page. Now we use ISO-8859-4.
294  browser()->profile()->GetPrefs()->SetString(prefs::kDefaultCharset,
295                                              "ISO-8859-4");
296
297  content::WebContents* web_contents =
298      browser()->tab_strip_model()->GetActiveWebContents();
299  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestDatas); ++i) {
300    // Disable auto detect if it is on.
301    browser()->profile()->GetPrefs()->SetBoolean(
302        prefs::kWebKitUsesUniversalDetector, false);
303
304    base::FilePath test_file_path(test_dir_path);
305    test_file_path = test_file_path.AppendASCII(kTestDatas[i].test_file_name);
306    GURL url = content::URLRequestMockHTTPJob::GetMockUrl(test_file_path);
307    ui_test_utils::NavigateToURL(browser(), url);
308
309    // Get the encoding used for the page, it must be the default charset we
310    // just set.
311    EXPECT_EQ("ISO-8859-4", web_contents->GetEncoding());
312
313    // Enable the encoding auto detection.
314    browser()->profile()->GetPrefs()->SetBoolean(
315        prefs::kWebKitUsesUniversalDetector, true);
316
317    content::TestNavigationObserver observer(web_contents);
318    chrome::Reload(browser(), CURRENT_TAB);
319    observer.Wait();
320
321    // Re-get the encoding of page. It should return the real encoding now.
322    EXPECT_EQ(kTestDatas[i].expected_encoding, web_contents->GetEncoding());
323
324    // Dump the page, the content of dump page should be equal with our expect
325    // result file.
326    base::FilePath expected_result_file_name =
327        base::FilePath().AppendASCII(kAutoDetectDir).
328        AppendASCII(kExpectedResultDir).
329        AppendASCII(kTestDatas[i].expected_result);
330    SaveAndCompare(kTestDatas[i].test_file_name, expected_result_file_name);
331  }
332}
333