1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/basictypes.h"
6#include "net/base/data_url.h"
7#include "testing/gtest/include/gtest/gtest.h"
8#include "url/gurl.h"
9
10namespace {
11
12struct ParseTestData {
13  const char* url;
14  bool is_valid;
15  const char* mime_type;
16  const char* charset;
17  const char* data;
18};
19
20}
21
22TEST(DataURLTest, Parse) {
23  const ParseTestData tests[] = {
24    { "data:",
25       false,
26       "",
27       "",
28       "" },
29
30    { "data:,",
31      true,
32      "text/plain",
33      "US-ASCII",
34      "" },
35
36    { "data:;base64,",
37      true,
38      "text/plain",
39      "US-ASCII",
40      "" },
41
42    { "data:;charset=,test",
43      false,
44      "",
45      "",
46      "" },
47
48    { "data:TeXt/HtMl,<b>x</b>",
49      true,
50      "text/html",
51      "US-ASCII",
52      "<b>x</b>" },
53
54    { "data:,foo",
55      true,
56      "text/plain",
57      "US-ASCII",
58      "foo" },
59
60    { "data:;base64,aGVsbG8gd29ybGQ=",
61      true,
62      "text/plain",
63      "US-ASCII",
64      "hello world" },
65
66    // Allow invalid mediatype for backward compatibility but set mime_type to
67    // "text/plain" instead of the invalid mediatype.
68    { "data:foo,boo",
69      true,
70      "text/plain",
71      "US-ASCII",
72      "boo" },
73
74    // When accepting an invalid mediatype, override charset with "US-ASCII"
75    { "data:foo;charset=UTF-8,boo",
76      true,
77      "text/plain",
78      "US-ASCII",
79      "boo" },
80
81    // Invalid mediatype. Includes a slash but the type part is not a token.
82    { "data:f(oo/bar;baz=1;charset=kk,boo",
83      true,
84      "text/plain",
85      "US-ASCII",
86      "boo" },
87
88    { "data:foo/bar;baz=1;charset=kk,boo",
89      true,
90      "foo/bar",
91      "kk",
92      "boo" },
93
94    { "data:foo/bar;charset=kk;baz=1,boo",
95      true,
96      "foo/bar",
97      "kk",
98      "boo" },
99
100    { "data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
101          "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
102      true,
103      "text/html",
104      "US-ASCII",
105      "<html><body><b>hello world</b></body></html>" },
106
107    { "data:text/html,<html><body><b>hello world</b></body></html>",
108      true,
109      "text/html",
110      "US-ASCII",
111      "<html><body><b>hello world</b></body></html>" },
112
113    // the comma cannot be url-escaped!
114    { "data:%2Cblah",
115      false,
116      "",
117      "",
118      "" },
119
120    // invalid base64 content
121    { "data:;base64,aGVs_-_-",
122      false,
123      "",
124      "",
125      "" },
126
127    // Spaces should be removed from non-text data URLs (we already tested
128    // spaces above).
129    { " bG8gd2  9ybGQ=",
137      true,
138      "text/plain",
139      "US-ASCII",
140      "hello world" },
141
142    // Other whitespace should also be removed from anything base-64 encoded.
143    { "data:;base64,aGVs bG8gd2  \n9ybGQ=",
144      true,
145      "text/plain",
146      "US-ASCII",
147      "hello world" },
148
149    // In base64 encoding, escaped whitespace should be stripped.
150    // (This test was taken from acid3)
151    // http://b/1054495
152    { "data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
153          "%20",
154      true,
155      "text/javascript",
156      "US-ASCII",
157      "d4 = 'four';" },
158
159    // Only unescaped whitespace should be stripped in non-base64.
160    // http://b/1157796
161    { "data:img/png,A  B  %20  %0A  C",
162      true,
163      "img/png",
164      "US-ASCII",
165      "AB \nC" },
166
167    { "data:text/plain;charset=utf-8;base64,SGVsbMO2",
168      true,
169      "text/plain",
170      "utf-8",
171      "Hell\xC3\xB6" },
172
173    // Not sufficiently padded.
174    { "data:;base64,aGVsbG8gd29ybGQ",
175      true,
176      "text/plain",
177      "US-ASCII",
178      "hello world" },
179
180    // Bad encoding (truncated).
181    { "data:;base64,aGVsbG8gd29yb",
182      false,
183      "",
184      "",
185      "" },
186
187    // TODO(darin): add more interesting tests
188  };
189
190  for (size_t i = 0; i < arraysize(tests); ++i) {
191    std::string mime_type;
192    std::string charset;
193    std::string data;
194    bool ok =
195        net::DataURL::Parse(GURL(tests[i].url), &mime_type, &charset, &data);
196    EXPECT_EQ(ok, tests[i].is_valid);
197    if (tests[i].is_valid) {
198      EXPECT_EQ(tests[i].mime_type, mime_type);
199      EXPECT_EQ(tests[i].charset, charset);
200      EXPECT_EQ(tests[i].data, data);
201    }
202  }
203}
204