1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file.
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// NOTE: based loosely on mozilla's nsDataChannel.cpp
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <algorithm>
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "net/base/data_url.h"
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/base64.h"
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/string_util.h"
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/gurl.h"
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "net/base/escape.h"
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace net {
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// static
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool DataURL::Parse(const GURL& url, std::string* mime_type,
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    std::string* charset, std::string* data) {
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string::const_iterator begin = url.spec().begin();
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string::const_iterator end = url.spec().end();
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string::const_iterator after_colon = std::find(begin, end, ':');
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (after_colon == end)
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return false;
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ++after_colon;
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // first, find the start of the data
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string::const_iterator comma = std::find(after_colon, end, ',');
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (comma == end)
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return false;
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const char kBase64Tag[] = ";base64";
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string::const_iterator it =
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      std::search(after_colon, comma, kBase64Tag,
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                  kBase64Tag + sizeof(kBase64Tag)-1);
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool base64_encoded = (it != comma);
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (comma != after_colon) {
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // everything else is content type
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    std::string::const_iterator semi_colon = std::find(after_colon, comma, ';');
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (semi_colon != after_colon) {
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      mime_type->assign(after_colon, semi_colon);
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      StringToLowerASCII(mime_type);
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (semi_colon != comma) {
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      const char kCharsetTag[] = "charset=";
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      it = std::search(semi_colon + 1, comma, kCharsetTag,
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       kCharsetTag + sizeof(kCharsetTag)-1);
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      if (it != comma)
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        charset->assign(it + sizeof(kCharsetTag)-1, comma);
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // fallback to defaults if nothing specified in the URL:
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (mime_type->empty())
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    mime_type->assign("text/plain");
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (charset->empty())
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    charset->assign("US-ASCII");
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
6321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  // The caller may not be interested in receiving the data.
6421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  if (!data)
6521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    return true;
6621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Preserve spaces if dealing with text or xml input, same as mozilla:
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   https://bugzilla.mozilla.org/show_bug.cgi?id=138052
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // but strip them otherwise:
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //   https://bugzilla.mozilla.org/show_bug.cgi?id=37200
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // (Spaces in a data URL should be escaped, which is handled below, so any
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // spaces now are wrong. People expect to be able to enter them in the URL
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // bar for text, and it can't hurt, so we allow it.)
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string temp_data = std::string(comma + 1, end);
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // For base64, we may have url-escaped whitespace which is not part
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // of the data, and should be stripped. Otherwise, the escaped whitespace
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // could be part of the payload, so don't strip it.
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (base64_encoded) {
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    temp_data = UnescapeURLComponent(temp_data,
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS |
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        UnescapeRule::CONTROL_CHARS);
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Strip whitespace.
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (base64_encoded || !(mime_type->compare(0, 5, "text/") == 0 ||
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          mime_type->find("xml") != std::string::npos)) {
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    temp_data.erase(std::remove_if(temp_data.begin(), temp_data.end(),
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                   IsAsciiWhitespace<wchar_t>),
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    temp_data.end());
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (!base64_encoded) {
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    temp_data = UnescapeURLComponent(temp_data,
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS |
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        UnescapeRule::CONTROL_CHARS);
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (base64_encoded)
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return base::Base64Decode(temp_data, data);
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  temp_data.swap(*data);
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return true;
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace net
107