1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file. 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// NOTE: based loosely on mozilla's nsDataChannel.cpp 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <algorithm> 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "net/base/data_url.h" 10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/base64.h" 12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/string_util.h" 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/gurl.h" 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "net/base/escape.h" 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace net { 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// static 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DataURL::Parse(const GURL& url, std::string* mime_type, 20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string* charset, std::string* data) { 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string::const_iterator begin = url.spec().begin(); 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string::const_iterator end = url.spec().end(); 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string::const_iterator after_colon = std::find(begin, end, ':'); 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (after_colon == end) 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; 27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ++after_colon; 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // first, find the start of the data 30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string::const_iterator comma = std::find(after_colon, end, ','); 31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (comma == end) 32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; 33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char kBase64Tag[] = ";base64"; 35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string::const_iterator it = 36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::search(after_colon, comma, kBase64Tag, 37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott kBase64Tag + sizeof(kBase64Tag)-1); 38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool base64_encoded = (it != comma); 40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (comma != after_colon) { 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // everything else is content type 43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string::const_iterator semi_colon = std::find(after_colon, comma, ';'); 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (semi_colon != after_colon) { 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott mime_type->assign(after_colon, semi_colon); 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott StringToLowerASCII(mime_type); 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (semi_colon != comma) { 49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char kCharsetTag[] = "charset="; 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott it = std::search(semi_colon + 1, comma, kCharsetTag, 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott kCharsetTag + sizeof(kCharsetTag)-1); 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (it != comma) 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset->assign(it + sizeof(kCharsetTag)-1, comma); 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // fallback to defaults if nothing specified in the URL: 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (mime_type->empty()) 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott mime_type->assign("text/plain"); 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (charset->empty()) 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott charset->assign("US-ASCII"); 62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 6321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen // The caller may not be interested in receiving the data. 6421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen if (!data) 6521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return true; 6621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Preserve spaces if dealing with text or xml input, same as mozilla: 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // https://bugzilla.mozilla.org/show_bug.cgi?id=138052 69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // but strip them otherwise: 70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // https://bugzilla.mozilla.org/show_bug.cgi?id=37200 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // (Spaces in a data URL should be escaped, which is handled below, so any 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // spaces now are wrong. People expect to be able to enter them in the URL 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // bar for text, and it can't hurt, so we allow it.) 74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string temp_data = std::string(comma + 1, end); 75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // For base64, we may have url-escaped whitespace which is not part 77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // of the data, and should be stripped. Otherwise, the escaped whitespace 78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // could be part of the payload, so don't strip it. 79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (base64_encoded) { 80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott temp_data = UnescapeURLComponent(temp_data, 81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS | 82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott UnescapeRule::CONTROL_CHARS); 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Strip whitespace. 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (base64_encoded || !(mime_type->compare(0, 5, "text/") == 0 || 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott mime_type->find("xml") != std::string::npos)) { 88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott temp_data.erase(std::remove_if(temp_data.begin(), temp_data.end(), 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott IsAsciiWhitespace<wchar_t>), 90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott temp_data.end()); 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (!base64_encoded) { 94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott temp_data = UnescapeURLComponent(temp_data, 95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS | 96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott UnescapeRule::CONTROL_CHARS); 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (base64_encoded) 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return base::Base64Decode(temp_data, data); 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott temp_data.swap(*data); 103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return true; 104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace net 107