data_url.h revision ab8f6f0bd665d3c1ff476eb06c58c42630e462d4
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef NET_BASE_DATA_URL_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NET_BASE_DATA_URL_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/base/net_export.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class GURL;
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace net {
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// See RFC 2397 for a complete description of the 'data' URL scheme.
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Briefly, a 'data' URL has the form:
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   data:[<mediatype>][;base64],<data>
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The <mediatype> is an Internet media type specification (with optional
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// parameters.)  The appearance of ";base64" means that the data is encoded as
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// base64.  Without ";base64", the data (as a sequence of octets) is represented
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// using ASCII encoding for octets inside the range of safe URL characters and
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// using the standard %xx hex encoding of URLs for octets outside that range.
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If <mediatype> is omitted, it defaults to text/plain;charset=US-ASCII.  As a
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// shorthand, "text/plain" can be omitted but the charset parameter supplied.
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class NET_EXPORT DataURL {
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This method can be used to parse a 'data' URL into its component pieces.
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The resulting mime_type is normalized to lowercase.  The data is the
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // decoded data (e.g.., if the data URL specifies base64 encoding, then the
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // returned data is base64 decoded, and any %-escaped bytes are unescaped).
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
38ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // If the media type value doesn't match the media-type production defined in
39ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // RFC 7231, mime_type will be set to the default value "text/plain". We
40ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // don't simply fail for this grammar violation since Chromium had been
41ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // accepting such invalid values. For example, <img> element with the src
42ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // attribute set to a data URL with an invalid media type "image" (without a
43ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // slash and subtype) had been displayed. However, the value this method will
44ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // store in mime_type argument can be used for generating other headers, etc.
45ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // This could lead to security vulnerability. We don't want to accept
46ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // arbitrary value and ask each caller to validate the return value.
47ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  //
48ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // If the charset parameter is specified but its value doesn't match the
49ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // token production defined in RFC 7230, this method simply fails and returns
50ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // false.
51ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  //
52ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // If there's any other grammar violation in the URL, then this method will
53ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // return false. Output variables may be changed and contain invalid data. On
54ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch  // success, true is returned.
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // OPTIONAL: If |data| is NULL, then the <data> section will not be parsed
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //           or validated.
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static bool Parse(const GURL& url,
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    std::string* mime_type,
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    std::string* charset,
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    std::string* data);
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace net
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // NET_BASE_DATA_URL_H_
68