1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/basictypes.h"
6#include "googleurl/src/gurl.h"
7#include "net/base/mime_sniffer.h"
8#include "testing/gtest/include/gtest/gtest.h"
9
10namespace net {
11
12struct SnifferTest {
13  const char* content;
14  size_t content_len;
15  std::string url;
16  std::string type_hint;
17  const char* mime_type;
18};
19
20static void TestArray(SnifferTest* tests, size_t count) {
21  std::string mime_type;
22
23  for (size_t i = 0; i < count; ++i) {
24    SniffMimeType(tests[i].content,
25                       tests[i].content_len,
26                       GURL(tests[i].url),
27                       tests[i].type_hint,
28                       &mime_type);
29    EXPECT_EQ(tests[i].mime_type, mime_type);
30  }
31}
32
33// TODO(evanm): convert other tests to use SniffMimeType instead of TestArray,
34// so the error messages produced by test failures are more useful.
35static std::string SniffMimeType(const std::string& content,
36                                 const std::string& url,
37                                 const std::string& mime_type_hint) {
38  std::string mime_type;
39  SniffMimeType(content.data(), content.size(), GURL(url),
40                     mime_type_hint, &mime_type);
41  return mime_type;
42}
43
44TEST(MimeSnifferTest, BoundaryConditionsTest) {
45  std::string mime_type;
46  std::string type_hint;
47
48  char buf[] = {
49    'd', '\x1f', '\xFF'
50  };
51
52  GURL url;
53
54  SniffMimeType(buf, 0, url, type_hint, &mime_type);
55  EXPECT_EQ("text/plain", mime_type);
56  SniffMimeType(buf, 1, url, type_hint, &mime_type);
57  EXPECT_EQ("text/plain", mime_type);
58  SniffMimeType(buf, 2, url, type_hint, &mime_type);
59  EXPECT_EQ("application/octet-stream", mime_type);
60}
61
62TEST(MimeSnifferTest, BasicSniffingTest) {
63  SnifferTest tests[] = {
64    { "<!DOCTYPE html PUBLIC", sizeof("<!DOCTYPE html PUBLIC")-1,
65      "http://www.example.com/",
66      "", "text/html" },
67    { "<HtMl><Body></body></htMl>", sizeof("<HtMl><Body></body></htMl>")-1,
68      "http://www.example.com/foo.gif",
69      "application/octet-stream", "application/octet-stream" },
70    { "GIF89a\x1F\x83\x94", sizeof("GIF89a\xAF\x83\x94")-1,
71      "http://www.example.com/foo",
72      "text/plain", "image/gif" },
73    { "Gif87a\x1F\x83\x94", sizeof("Gif87a\xAF\x83\x94")-1,
74      "http://www.example.com/foo?param=tt.gif",
75      "", "application/octet-stream" },
76    { "%!PS-Adobe-3.0", sizeof("%!PS-Adobe-3.0")-1,
77      "http://www.example.com/foo",
78      "text/plain", "text/plain" },
79    { "\x89" "PNG\x0D\x0A\x1A\x0A", sizeof("\x89" "PNG\x0D\x0A\x1A\x0A")-1,
80      "http://www.example.com/foo",
81      "application/octet-stream", "application/octet-stream" },
82    { "\xFF\xD8\xFF\x23\x49\xAF", sizeof("\xFF\xD8\xFF\x23\x49\xAF")-1,
83      "http://www.example.com/foo",
84      "", "image/jpeg" },
85  };
86
87  TestArray(tests, arraysize(tests));
88}
89
90TEST(MimeSnifferTest, ChromeExtensionsTest) {
91  SnifferTest tests[] = {
92    // schemes
93    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
94      "http://www.example.com/foo.crx",
95      "", "application/x-chrome-extension" },
96    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
97      "https://www.example.com/foo.crx",
98      "", "application/x-chrome-extension" },
99    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
100      "ftp://www.example.com/foo.crx",
101      "", "application/x-chrome-extension" },
102
103    // some other mimetypes that should get converted
104    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
105      "http://www.example.com/foo.crx",
106      "text/plain", "application/x-chrome-extension" },
107    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
108      "http://www.example.com/foo.crx",
109      "application/octet-stream", "application/x-chrome-extension" },
110
111    // success edge cases
112    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
113      "http://www.example.com/foo.crx?query=string",
114      "", "application/x-chrome-extension" },
115    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
116      "http://www.example.com/foo..crx",
117      "", "application/x-chrome-extension" },
118
119    // wrong file extension
120    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
121      "http://www.example.com/foo.bin",
122      "", "application/octet-stream" },
123    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
124      "http://www.example.com/foo.bin?monkey",
125      "", "application/octet-stream" },
126    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
127      "invalid-url",
128      "", "application/octet-stream" },
129    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
130      "http://www.example.com",
131      "", "application/octet-stream" },
132    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
133      "http://www.example.com/",
134      "", "application/octet-stream" },
135    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
136      "http://www.example.com/foo",
137      "", "application/octet-stream" },
138    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
139      "http://www.example.com/foocrx",
140      "", "application/octet-stream" },
141    { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
142      "http://www.example.com/foo.crx.blech",
143      "", "application/octet-stream" },
144
145    // wrong magic
146    { "Cr24\x02\x00\x00\x01", sizeof("Cr24\x02\x00\x00\x01")-1,
147      "http://www.example.com/foo.crx?monkey",
148      "", "application/octet-stream" },
149    { "PADDING_Cr24\x02\x00\x00\x00", sizeof("PADDING_Cr24\x02\x00\x00\x00")-1,
150      "http://www.example.com/foo.crx?monkey",
151      "", "application/octet-stream" },
152  };
153
154  TestArray(tests, arraysize(tests));
155}
156
157TEST(MimeSnifferTest, MozillaCompatibleTest) {
158  SnifferTest tests[] = {
159    { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
160      "http://www.example.com/",
161      "", "text/html" },
162    { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
163      "http://www.example.com/",
164      "text/plain", "text/plain" },
165    { "BMjlakdsfk", sizeof("BMjlakdsfk")-1,
166      "http://www.example.com/foo",
167      "", "image/bmp" },
168    { "\x00\x00\x30\x00", sizeof("\x00\x00\x30\x00")-1,
169      "http://www.example.com/favicon.ico",
170      "", "application/octet-stream" },
171    { "#!/bin/sh\nls /\n", sizeof("#!/bin/sh\nls /\n")-1,
172      "http://www.example.com/foo",
173      "", "text/plain" },
174    { "From: Fred\nTo: Bob\n\nHi\n.\n",
175      sizeof("From: Fred\nTo: Bob\n\nHi\n.\n")-1,
176      "http://www.example.com/foo",
177      "", "text/plain" },
178    { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
179      sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
180      "http://www.example.com/foo",
181      "", "text/xml" },
182    { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
183      sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
184      "http://www.example.com/foo",
185      "application/octet-stream", "application/octet-stream" },
186  };
187
188  TestArray(tests, arraysize(tests));
189}
190
191TEST(MimeSnifferTest, DontAllowPrivilegeEscalationTest) {
192  SnifferTest tests[] = {
193    { "GIF87a\n<html>\n<body>"
194        "<script>alert('haxorzed');\n</script>"
195        "</body></html>\n",
196      sizeof("GIF87a\n<html>\n<body>"
197        "<script>alert('haxorzed');\n</script>"
198        "</body></html>\n")-1,
199      "http://www.example.com/foo",
200      "", "image/gif" },
201    { "GIF87a\n<html>\n<body>"
202        "<script>alert('haxorzed');\n</script>"
203        "</body></html>\n",
204      sizeof("GIF87a\n<html>\n<body>"
205        "<script>alert('haxorzed');\n</script>"
206        "</body></html>\n")-1,
207      "http://www.example.com/foo?q=ttt.html",
208      "", "image/gif" },
209    { "GIF87a\n<html>\n<body>"
210        "<script>alert('haxorzed');\n</script>"
211        "</body></html>\n",
212      sizeof("GIF87a\n<html>\n<body>"
213        "<script>alert('haxorzed');\n</script>"
214        "</body></html>\n")-1,
215      "http://www.example.com/foo#ttt.html",
216      "", "image/gif" },
217    { "a\n<html>\n<body>"
218        "<script>alert('haxorzed');\n</script>"
219        "</body></html>\n",
220      sizeof("a\n<html>\n<body>"
221        "<script>alert('haxorzed');\n</script>"
222        "</body></html>\n")-1,
223      "http://www.example.com/foo",
224      "", "text/plain" },
225    { "a\n<html>\n<body>"
226        "<script>alert('haxorzed');\n</script>"
227        "</body></html>\n",
228      sizeof("a\n<html>\n<body>"
229        "<script>alert('haxorzed');\n</script>"
230        "</body></html>\n")-1,
231      "http://www.example.com/foo?q=ttt.html",
232      "", "text/plain" },
233    { "a\n<html>\n<body>"
234        "<script>alert('haxorzed');\n</script>"
235        "</body></html>\n",
236      sizeof("a\n<html>\n<body>"
237        "<script>alert('haxorzed');\n</script>"
238        "</body></html>\n")-1,
239      "http://www.example.com/foo#ttt.html",
240      "", "text/plain" },
241    { "a\n<html>\n<body>"
242        "<script>alert('haxorzed');\n</script>"
243        "</body></html>\n",
244      sizeof("a\n<html>\n<body>"
245        "<script>alert('haxorzed');\n</script>"
246        "</body></html>\n")-1,
247      "http://www.example.com/foo.html",
248      "", "text/plain" },
249  };
250
251  TestArray(tests, arraysize(tests));
252}
253
254TEST(MimeSnifferTest, UnicodeTest) {
255  SnifferTest tests[] = {
256    { "\xEF\xBB\xBF" "Hi there", sizeof("\xEF\xBB\xBF" "Hi there")-1,
257      "http://www.example.com/foo",
258      "", "text/plain" },
259    { "\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79",
260      sizeof("\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79")-1,
261      "http://www.example.com/foo",
262      "", "text/plain" },
263    { "\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9",
264      sizeof("\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9")-1,
265      "http://www.example.com/foo",
266      "", "text/plain" },
267    { "\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01",
268      sizeof("\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01")-1,
269      "http://www.example.com/foo",
270      "", "text/plain" },
271  };
272
273  TestArray(tests, arraysize(tests));
274}
275
276TEST(MimeSnifferTest, FlashTest) {
277  SnifferTest tests[] = {
278    { "CWSdd\x00\xB3", sizeof("CWSdd\x00\xB3")-1,
279      "http://www.example.com/foo",
280      "", "application/octet-stream" },
281    { "FLVjdkl*(#)0sdj\x00", sizeof("FLVjdkl*(#)0sdj\x00")-1,
282      "http://www.example.com/foo?q=ttt.swf",
283      "", "application/octet-stream" },
284    { "FWS3$9\r\b\x00", sizeof("FWS3$9\r\b\x00")-1,
285      "http://www.example.com/foo#ttt.swf",
286      "", "application/octet-stream" },
287    { "FLVjdkl*(#)0sdj", sizeof("FLVjdkl*(#)0sdj")-1,
288      "http://www.example.com/foo.swf",
289      "", "text/plain" },
290    { "FLVjdkl*(#)0s\x01dj", sizeof("FLVjdkl*(#)0s\x01dj")-1,
291      "http://www.example.com/foo/bar.swf",
292      "", "application/octet-stream" },
293    { "FWS3$9\r\b\x1A", sizeof("FWS3$9\r\b\x1A")-1,
294      "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
295      "", "application/octet-stream" },
296    { "FWS3$9\r\x1C\b", sizeof("FWS3$9\r\x1C\b")-1,
297      "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
298      "text/plain", "application/octet-stream" },
299  };
300
301  TestArray(tests, arraysize(tests));
302}
303
304TEST(MimeSnifferTest, XMLTest) {
305  // An easy feed to identify.
306  EXPECT_EQ("application/atom+xml",
307            SniffMimeType("<?xml?><feed", "", "text/xml"));
308  // Don't sniff out of plain text.
309  EXPECT_EQ("text/plain",
310            SniffMimeType("<?xml?><feed", "", "text/plain"));
311  // Simple RSS.
312  EXPECT_EQ("application/rss+xml",
313            SniffMimeType("<?xml version='1.0'?>\r\n<rss", "", "text/xml"));
314
315  // The top of CNN's RSS feed, which we'd like to recognize as RSS.
316  static const char kCNNRSS[] =
317      "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
318      "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/rss2full.xsl\" "
319      "type=\"text/xsl\" media=\"screen\"?>"
320      "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/itemcontent.css\" "
321      "type=\"text/css\" media=\"screen\"?>"
322      "<rss xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" "
323      "version=\"2.0\">";
324  // CNN's RSS
325  EXPECT_EQ("application/rss+xml",
326            SniffMimeType(kCNNRSS, "", "text/xml"));
327  EXPECT_EQ("text/plain",
328            SniffMimeType(kCNNRSS, "", "text/plain"));
329
330  // Don't sniff random XML as something different.
331  EXPECT_EQ("text/xml",
332            SniffMimeType("<?xml?><notafeed", "", "text/xml"));
333  // Don't sniff random plain-text as something different.
334  EXPECT_EQ("text/plain",
335            SniffMimeType("<?xml?><notafeed", "", "text/plain"));
336
337  // Positive test for the two instances we upgrade to XHTML.
338  EXPECT_EQ("application/xhtml+xml",
339            SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
340                          "", "text/xml"));
341  EXPECT_EQ("application/xhtml+xml",
342            SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
343                          "", "application/xml"));
344
345  // Following our behavior with HTML, don't call other mime types XHTML.
346  EXPECT_EQ("text/plain",
347            SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
348                          "", "text/plain"));
349  EXPECT_EQ("application/rss+xml",
350            SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
351                          "", "application/rss+xml"));
352
353  // Don't sniff other HTML-looking bits as HTML.
354  EXPECT_EQ("text/xml",
355            SniffMimeType("<html><head>", "", "text/xml"));
356  EXPECT_EQ("text/xml",
357            SniffMimeType("<foo><html xmlns=\"http://www.w3.org/1999/xhtml\">",
358                          "", "text/xml"));
359
360}
361
362// Test content which is >= 1024 bytes, and includes no open angle bracket.
363// http://code.google.com/p/chromium/issues/detail?id=3521
364TEST(MimeSnifferTest, XMLTestLargeNoAngledBracket) {
365  // Make a large input, with 1024 bytes of "x".
366  std::string content;
367  content.resize(1024);
368  std::fill(content.begin(), content.end(), 'x');
369
370  // content.size() >= 1024 so the sniff is unambiguous.
371  std::string mime_type;
372  EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(),
373                            "text/xml", &mime_type));
374  EXPECT_EQ("text/xml", mime_type);
375}
376
377// Test content which is >= 1024 bytes, and includes a binary looking byte.
378// http://code.google.com/p/chromium/issues/detail?id=15314
379TEST(MimeSnifferTest, LooksBinary) {
380  // Make a large input, with 1024 bytes of "x" and 1 byte of 0x01.
381  std::string content;
382  content.resize(1024);
383  std::fill(content.begin(), content.end(), 'x');
384  content[1000] = 0x01;
385
386  // content.size() >= 1024 so the sniff is unambiguous.
387  std::string mime_type;
388  EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(),
389                            "text/plain", &mime_type));
390  EXPECT_EQ("application/octet-stream", mime_type);
391}
392
393}  // namespace net
394