1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// found in the LICENSE file. 4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Functions for canonicalizing "mailto:" URLs. 6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_canon.h" 8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_canon_internal.h" 9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_file.h" 10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "url/url_parse_internal.h" 11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 120529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochnamespace url { 13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace { 15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 160529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochtemplate <typename CHAR, typename UCHAR> 17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source, 180529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Parsed& parsed, 19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 200529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed* new_parsed) { 21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // mailto: only uses {scheme, path, query} -- clear the rest. 220529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch new_parsed->username = Component(); 230529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch new_parsed->password = Component(); 240529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch new_parsed->host = Component(); 250529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch new_parsed->port = Component(); 260529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch new_parsed->ref = Component(); 27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Scheme (known, so we don't bother running it through the more 29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // complicated scheme canonicalizer). 30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) new_parsed->scheme.begin = output->length(); 31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->Append("mailto:", 7); 32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) new_parsed->scheme.len = 6; 33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool success = true; 35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Path 37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (parsed.path.is_valid()) { 38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) new_parsed->path.begin = output->length(); 39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Copy the path using path URL's more lax escaping rules. 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // We convert to UTF-8 and escape non-ASCII, but leave all 42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // ASCII characters alone. 43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int end = parsed.path.end(); 44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = parsed.path.begin; i < end; ++i) { 45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) UCHAR uch = static_cast<UCHAR>(source.path[i]); 46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (uch < 0x20 || uch >= 0x80) 47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) success &= AppendUTF8EscapedChar(source.path, &i, end, output); 48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) else 49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output->push_back(static_cast<char>(uch)); 50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 51c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) new_parsed->path.len = output->length() - new_parsed->path.begin; 53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // No path at all 55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) new_parsed->path.reset(); 56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Query -- always use the default utf8 charset converter. 59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonicalizeQuery(source.query, parsed.query, NULL, 60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) output, &new_parsed->query); 61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return success; 63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} // namespace 66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool CanonicalizeMailtoURL(const char* spec, 680529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch int spec_len, 690529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Parsed& parsed, 700529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch CanonOutput* output, 710529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed* new_parsed) { 72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoCanonicalizeMailtoURL<char, unsigned char>( 73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) URLComponentSource<char>(spec), parsed, output, new_parsed); 74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 767d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)bool CanonicalizeMailtoURL(const base::char16* spec, 77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int spec_len, 780529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Parsed& parsed, 79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 800529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed* new_parsed) { 817d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) return DoCanonicalizeMailtoURL<base::char16, base::char16>( 827d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) URLComponentSource<base::char16>(spec), parsed, output, new_parsed); 83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool ReplaceMailtoURL(const char* base, 860529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Parsed& base_parsed, 87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Replacements<char>& replacements, 88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 890529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed* new_parsed) { 90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) URLComponentSource<char> source(base); 910529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed parsed(base_parsed); 92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) SetupOverrideComponents(base, replacements, &source, &parsed); 93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoCanonicalizeMailtoURL<char, unsigned char>( 94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) source, parsed, output, new_parsed); 95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool ReplaceMailtoURL(const char* base, 980529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Parsed& base_parsed, 997d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const Replacements<base::char16>& replacements, 100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) CanonOutput* output, 1010529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed* new_parsed) { 102c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) RawCanonOutput<1024> utf8; 103c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) URLComponentSource<char> source(base); 1040529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Parsed parsed(base_parsed); 105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return DoCanonicalizeMailtoURL<char, unsigned char>( 107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) source, parsed, output, new_parsed); 108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 1100529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch} // namespace url 111