browser_encoding_uitest.cc revision ddb351dbec246cf1fab5ec20d2d5520909041de1
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4#include <string>
5
6#include "base/file_util.h"
7#include "base/memory/scoped_temp_dir.h"
8#include "chrome/browser/net/url_request_mock_http_job.h"
9#include "chrome/browser/download/save_package.h"
10#include "chrome/common/pref_names.h"
11#include "chrome/test/automation/browser_proxy.h"
12#include "chrome/test/automation/tab_proxy.h"
13#include "chrome/test/ui/ui_test.h"
14#include "chrome/test/ui_test_utils.h"
15
16static const FilePath::CharType* kTestDir = FILE_PATH_LITERAL("encoding_tests");
17
18class BrowserEncodingTest : public UITest {
19 protected:
20  BrowserEncodingTest() : UITest() {}
21
22  // Make sure the content of the page are as expected
23  // after override or auto-detect
24  void CheckFile(const FilePath& generated_file,
25                 const FilePath& expected_result_file,
26                 bool check_equal) {
27    FilePath expected_result_filepath = ui_test_utils::GetTestFilePath(
28        FilePath(kTestDir), expected_result_file);
29
30    ASSERT_TRUE(file_util::PathExists(expected_result_filepath));
31    WaitForGeneratedFileAndCheck(generated_file,
32                                 expected_result_filepath,
33                                 true,  // We do care whether they are equal.
34                                 check_equal,
35                                 true);  // Delete the generated file when done.
36  }
37
38  virtual void SetUp() {
39    UITest::SetUp();
40    ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
41    save_dir_ = temp_dir_.path();
42    temp_sub_resource_dir_ = save_dir_.AppendASCII("sub_resource_files");
43  }
44
45  ScopedTempDir temp_dir_;
46  FilePath save_dir_;
47  FilePath temp_sub_resource_dir_;
48};
49
50// TODO(jnd): 1. Some encodings are missing here. It'll be added later. See
51// http://crbug.com/13306.
52// 2. Add more files with multiple encoding name variants for each canonical
53// encoding name). Webkit layout tests cover some, but testing in the UI test is
54// also necessary.
55TEST_F(BrowserEncodingTest, TestEncodingAliasMapping) {
56  struct EncodingTestData {
57    const char* file_name;
58    const char* encoding_name;
59  };
60
61  const EncodingTestData kEncodingTestDatas[] = {
62    { "Big5.html", "Big5" },
63    { "EUC-JP.html", "EUC-JP" },
64    { "gb18030.html", "gb18030" },
65    { "iso-8859-1.html", "ISO-8859-1" },
66    { "ISO-8859-2.html", "ISO-8859-2" },
67    { "ISO-8859-4.html", "ISO-8859-4" },
68    { "ISO-8859-5.html", "ISO-8859-5" },
69    { "ISO-8859-6.html", "ISO-8859-6" },
70    { "ISO-8859-7.html", "ISO-8859-7" },
71    { "ISO-8859-8.html", "ISO-8859-8" },
72    { "ISO-8859-13.html", "ISO-8859-13" },
73    { "ISO-8859-15.html", "ISO-8859-15" },
74    { "KOI8-R.html", "KOI8-R" },
75    { "KOI8-U.html", "KOI8-U" },
76    { "macintosh.html", "macintosh" },
77    { "Shift-JIS.html", "Shift_JIS" },
78    { "US-ASCII.html", "ISO-8859-1" },  // http://crbug.com/15801
79    { "UTF-8.html", "UTF-8" },
80    { "UTF-16LE.html", "UTF-16LE" },
81    { "windows-874.html", "windows-874" },
82    { "windows-949.html", "windows-949" },
83    { "windows-1250.html", "windows-1250" },
84    { "windows-1251.html", "windows-1251" },
85    { "windows-1252.html", "windows-1252" },
86    { "windows-1253.html", "windows-1253" },
87    { "windows-1254.html", "windows-1254" },
88    { "windows-1255.html", "windows-1255" },
89    { "windows-1256.html", "windows-1256" },
90    { "windows-1257.html", "windows-1257" },
91    { "windows-1258.html", "windows-1258" }
92  };
93  const char* const kAliasTestDir = "alias_mapping";
94
95  scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
96  ASSERT_TRUE(tab_proxy.get());
97
98  FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kAliasTestDir);
99  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEncodingTestDatas); ++i) {
100    FilePath test_file_path(test_dir_path);
101    test_file_path = test_file_path.AppendASCII(
102        kEncodingTestDatas[i].file_name);
103
104    NavigateToURL(URLRequestMockHTTPJob::GetMockUrl(test_file_path));
105
106    std::string encoding;
107    EXPECT_TRUE(tab_proxy->GetPageCurrentEncoding(&encoding));
108    EXPECT_EQ(encoding, kEncodingTestDatas[i].encoding_name);
109  }
110}
111
112// Marked as flaky: see  http://crbug.com/44668
113TEST_F(BrowserEncodingTest, FLAKY_TestOverrideEncoding) {
114  const char* const kTestFileName = "gb18030_with_iso88591_meta.html";
115  const char* const kExpectedFileName =
116      "expected_gb18030_saved_from_iso88591_meta.html";
117  const char* const kOverrideTestDir = "user_override";
118
119  FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kOverrideTestDir);
120  test_dir_path = test_dir_path.AppendASCII(kTestFileName);
121  GURL url = URLRequestMockHTTPJob::GetMockUrl(test_dir_path);
122  scoped_refptr<TabProxy> tab_proxy(GetActiveTab());
123  ASSERT_TRUE(tab_proxy.get());
124  ASSERT_TRUE(tab_proxy->NavigateToURL(url));
125  WaitUntilTabCount(1);
126
127  // Get the encoding declared in the page.
128  std::string encoding;
129  EXPECT_TRUE(tab_proxy->GetPageCurrentEncoding(&encoding));
130  EXPECT_EQ(encoding, "ISO-8859-1");
131
132  // Override the encoding to "gb18030".
133  int64 last_nav_time = 0;
134  EXPECT_TRUE(tab_proxy->GetLastNavigationTime(&last_nav_time));
135  EXPECT_TRUE(tab_proxy->OverrideEncoding("gb18030"));
136  EXPECT_TRUE(tab_proxy->WaitForNavigation(last_nav_time));
137
138  // Re-get the encoding of page. It should be gb18030.
139  EXPECT_TRUE(tab_proxy->GetPageCurrentEncoding(&encoding));
140  EXPECT_EQ(encoding, "gb18030");
141
142  // Dump the page, the content of dump page should be identical to the
143  // expected result file.
144  FilePath full_file_name = save_dir_.AppendASCII(kTestFileName);
145  // We save the page as way of complete HTML file, which requires a directory
146  // name to save sub resources in it. Although this test file does not have
147  // sub resources, but the directory name is still required.
148  EXPECT_TRUE(tab_proxy->SavePage(full_file_name, temp_sub_resource_dir_,
149                                  SavePackage::SAVE_AS_COMPLETE_HTML));
150  scoped_refptr<BrowserProxy> browser(automation()->GetBrowserWindow(0));
151  ASSERT_TRUE(browser.get());
152  EXPECT_TRUE(WaitForDownloadShelfVisible(browser.get()));
153  FilePath expected_file_name = FilePath().AppendASCII(kOverrideTestDir);
154  expected_file_name = expected_file_name.AppendASCII(kExpectedFileName);
155  CheckFile(full_file_name, expected_file_name, true);
156}
157
158// The following encodings are excluded from the auto-detection test because
159// it's a known issue that the current encoding detector does not detect them:
160// ISO-8859-4
161// ISO-8859-13
162// KOI8-U
163// macintosh
164// windows-874
165// windows-1252
166// windows-1253
167// windows-1257
168// windows-1258
169
170// For Hebrew, the expected encoding value is ISO-8859-8-I. See
171// http://crbug.com/2927 for more details.
172// FLAKY / Disabled on CrOS: see http://crbug.com/44666
173#if defined(OS_CHROMEOS)
174#define MAYBE_TestEncodingAutoDetect DISABLED_TestEncodingAutoDetect
175#else
176#define MAYBE_TestEncodingAutoDetect FLAKY_TestEncodingAutoDetect
177#endif
178
179TEST_F(BrowserEncodingTest, MAYBE_TestEncodingAutoDetect) {
180  struct EncodingAutoDetectTestData {
181    const char* test_file_name;   // File name of test data.
182    const char* expected_result;  // File name of expected results.
183    const char* expected_encoding;   // expected encoding.
184  };
185  const EncodingAutoDetectTestData kTestDatas[] = {
186      { "Big5_with_no_encoding_specified.html",
187        "expected_Big5_saved_from_no_encoding_specified.html",
188        "Big5" },
189      { "gb18030_with_no_encoding_specified.html",
190        "expected_gb18030_saved_from_no_encoding_specified.html",
191        "gb18030" },
192      { "iso-8859-1_with_no_encoding_specified.html",
193        "expected_iso-8859-1_saved_from_no_encoding_specified.html",
194        "ISO-8859-1" },
195      { "ISO-8859-5_with_no_encoding_specified.html",
196        "expected_ISO-8859-5_saved_from_no_encoding_specified.html",
197        "ISO-8859-5" },
198      { "ISO-8859-6_with_no_encoding_specified.html",
199        "expected_ISO-8859-6_saved_from_no_encoding_specified.html",
200        "ISO-8859-6" },
201      { "ISO-8859-7_with_no_encoding_specified.html",
202        "expected_ISO-8859-7_saved_from_no_encoding_specified.html",
203        "ISO-8859-7" },
204      { "ISO-8859-8_with_no_encoding_specified.html",
205        "expected_ISO-8859-8_saved_from_no_encoding_specified.html",
206        "ISO-8859-8-I" },
207      { "KOI8-R_with_no_encoding_specified.html",
208        "expected_KOI8-R_saved_from_no_encoding_specified.html",
209        "KOI8-R" },
210      { "Shift-JIS_with_no_encoding_specified.html",
211        "expected_Shift-JIS_saved_from_no_encoding_specified.html",
212        "Shift_JIS" },
213      { "UTF-8_with_no_encoding_specified.html",
214        "expected_UTF-8_saved_from_no_encoding_specified.html",
215        "UTF-8" },
216      { "windows-949_with_no_encoding_specified.html",
217        "expected_windows-949_saved_from_no_encoding_specified.html",
218        "windows-949" },
219      { "windows-1251_with_no_encoding_specified.html",
220        "expected_windows-1251_saved_from_no_encoding_specified.html",
221        "windows-1251" },
222      { "windows-1254_with_no_encoding_specified.html",
223        "expected_windows-1254_saved_from_no_encoding_specified.html",
224        "windows-1254" },
225      { "windows-1255_with_no_encoding_specified.html",
226        "expected_windows-1255_saved_from_no_encoding_specified.html",
227        "windows-1255" },
228      { "windows-1256_with_no_encoding_specified.html",
229        "expected_windows-1256_saved_from_no_encoding_specified.html",
230        "windows-1256" }
231    };
232  const char* const kAutoDetectDir = "auto_detect";
233  // Directory of the files of expected results.
234  const char* const kExpectedResultDir = "expected_results";
235
236  // Full path of saved file. full_file_name = save_dir_ + file_name[i];
237  FilePath full_saved_file_name;
238
239  FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kAutoDetectDir);
240
241  scoped_refptr<BrowserProxy> browser(automation()->GetBrowserWindow(0));
242  ASSERT_TRUE(browser.get());
243  // Set the default charset to one of encodings not supported by the current
244  // auto-detector (Please refer to the above comments) to make sure we
245  // incorrectly decode the page. Now we use ISO-8859-4.
246  ASSERT_TRUE(browser->SetStringPreference(prefs::kDefaultCharset,
247                                           "ISO-8859-4"));
248  scoped_refptr<TabProxy> tab(GetActiveTab());
249  ASSERT_TRUE(tab.get());
250
251  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestDatas);i++) {
252    FilePath test_file_path(test_dir_path);
253    test_file_path = test_file_path.AppendASCII(kTestDatas[i].test_file_name);
254    GURL url =
255        URLRequestMockHTTPJob::GetMockUrl(test_file_path);
256    ASSERT_TRUE(tab->NavigateToURL(url));
257
258    // Disable auto detect if it is on.
259    EXPECT_TRUE(
260        browser->SetBooleanPreference(prefs::kWebKitUsesUniversalDetector,
261                                      false));
262    EXPECT_TRUE(tab->Reload());
263
264    // Get the encoding used for the page, it must be the default charset we
265    // just set.
266    std::string encoding;
267    EXPECT_TRUE(tab->GetPageCurrentEncoding(&encoding));
268    EXPECT_EQ(encoding, "ISO-8859-4");
269
270    // Enable the encoding auto detection.
271    EXPECT_TRUE(browser->SetBooleanPreference(
272        prefs::kWebKitUsesUniversalDetector, true));
273    EXPECT_TRUE(tab->Reload());
274
275    // Re-get the encoding of page. It should return the real encoding now.
276    bool encoding_auto_detect = false;
277    EXPECT_TRUE(
278        browser->GetBooleanPreference(prefs::kWebKitUsesUniversalDetector,
279                                      &encoding_auto_detect));
280    EXPECT_TRUE(encoding_auto_detect);
281    EXPECT_TRUE(tab->GetPageCurrentEncoding(&encoding));
282    EXPECT_EQ(encoding, kTestDatas[i].expected_encoding);
283
284    // Dump the page, the content of dump page should be equal with our expect
285    // result file.
286    full_saved_file_name = save_dir_.AppendASCII(kTestDatas[i].test_file_name);
287    // Full path of expect result file.
288    FilePath expected_result_file_name = FilePath().AppendASCII(kAutoDetectDir);
289    expected_result_file_name = expected_result_file_name.AppendASCII(
290        kExpectedResultDir);
291    expected_result_file_name = expected_result_file_name.AppendASCII(
292        kTestDatas[i].expected_result);
293    EXPECT_TRUE(tab->SavePage(full_saved_file_name, temp_sub_resource_dir_,
294                              SavePackage::SAVE_AS_COMPLETE_HTML));
295    EXPECT_TRUE(WaitForDownloadShelfVisible(browser.get()));
296    CheckFile(full_saved_file_name, expected_result_file_name, true);
297  }
298}
299