data_url.h revision ab8f6f0bd665d3c1ff476eb06c58c42630e462d4
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef NET_BASE_DATA_URL_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NET_BASE_DATA_URL_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/base/net_export.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class GURL; 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace net { 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// See RFC 2397 for a complete description of the 'data' URL scheme. 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Briefly, a 'data' URL has the form: 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// data:[<mediatype>][;base64],<data> 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The <mediatype> is an Internet media type specification (with optional 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// parameters.) The appearance of ";base64" means that the data is encoded as 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// base64. Without ";base64", the data (as a sequence of octets) is represented 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// using ASCII encoding for octets inside the range of safe URL characters and 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// using the standard %xx hex encoding of URLs for octets outside that range. 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If <mediatype> is omitted, it defaults to text/plain;charset=US-ASCII. As a 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// shorthand, "text/plain" can be omitted but the charset parameter supplied. 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class NET_EXPORT DataURL { 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This method can be used to parse a 'data' URL into its component pieces. 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The resulting mime_type is normalized to lowercase. The data is the 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // decoded data (e.g.., if the data URL specifies base64 encoding, then the 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // returned data is base64 decoded, and any %-escaped bytes are unescaped). 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 38ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // If the media type value doesn't match the media-type production defined in 39ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // RFC 7231, mime_type will be set to the default value "text/plain". We 40ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // don't simply fail for this grammar violation since Chromium had been 41ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // accepting such invalid values. For example, <img> element with the src 42ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // attribute set to a data URL with an invalid media type "image" (without a 43ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // slash and subtype) had been displayed. However, the value this method will 44ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // store in mime_type argument can be used for generating other headers, etc. 45ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // This could lead to security vulnerability. We don't want to accept 46ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // arbitrary value and ask each caller to validate the return value. 47ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // 48ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // If the charset parameter is specified but its value doesn't match the 49ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // token production defined in RFC 7230, this method simply fails and returns 50ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // false. 51ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // 52ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // If there's any other grammar violation in the URL, then this method will 53ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // return false. Output variables may be changed and contain invalid data. On 54ab8f6f0bd665d3c1ff476eb06c58c42630e462d4Ben Murdoch // success, true is returned. 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // OPTIONAL: If |data| is NULL, then the <data> section will not be parsed 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // or validated. 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static bool Parse(const GURL& url, 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string* mime_type, 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string* charset, 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string* data); 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace net 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // NET_BASE_DATA_URL_H_ 68