url_canon_mailtourl.cc revision 7d4cd473f85ac64c3747c96c277f9e506a0d2246
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Functions for canonicalizing "mailto:" URLs. 6 7#include "url/url_canon.h" 8#include "url/url_canon_internal.h" 9#include "url/url_file.h" 10#include "url/url_parse_internal.h" 11 12namespace url_canon { 13 14namespace { 15 16 17template<typename CHAR, typename UCHAR> 18bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source, 19 const url_parse::Parsed& parsed, 20 CanonOutput* output, 21 url_parse::Parsed* new_parsed) { 22 23 // mailto: only uses {scheme, path, query} -- clear the rest. 24 new_parsed->username = url_parse::Component(); 25 new_parsed->password = url_parse::Component(); 26 new_parsed->host = url_parse::Component(); 27 new_parsed->port = url_parse::Component(); 28 new_parsed->ref = url_parse::Component(); 29 30 // Scheme (known, so we don't bother running it through the more 31 // complicated scheme canonicalizer). 32 new_parsed->scheme.begin = output->length(); 33 output->Append("mailto:", 7); 34 new_parsed->scheme.len = 6; 35 36 bool success = true; 37 38 // Path 39 if (parsed.path.is_valid()) { 40 new_parsed->path.begin = output->length(); 41 42 // Copy the path using path URL's more lax escaping rules. 43 // We convert to UTF-8 and escape non-ASCII, but leave all 44 // ASCII characters alone. 45 int end = parsed.path.end(); 46 for (int i = parsed.path.begin; i < end; ++i) { 47 UCHAR uch = static_cast<UCHAR>(source.path[i]); 48 if (uch < 0x20 || uch >= 0x80) 49 success &= AppendUTF8EscapedChar(source.path, &i, end, output); 50 else 51 output->push_back(static_cast<char>(uch)); 52 } 53 54 new_parsed->path.len = output->length() - new_parsed->path.begin; 55 } else { 56 // No path at all 57 new_parsed->path.reset(); 58 } 59 60 // Query -- always use the default utf8 charset converter. 61 CanonicalizeQuery(source.query, parsed.query, NULL, 62 output, &new_parsed->query); 63 64 return success; 65} 66 67} // namespace 68 69bool CanonicalizeMailtoURL(const char* spec, 70 int spec_len, 71 const url_parse::Parsed& parsed, 72 CanonOutput* output, 73 url_parse::Parsed* new_parsed) { 74 return DoCanonicalizeMailtoURL<char, unsigned char>( 75 URLComponentSource<char>(spec), parsed, output, new_parsed); 76} 77 78bool CanonicalizeMailtoURL(const base::char16* spec, 79 int spec_len, 80 const url_parse::Parsed& parsed, 81 CanonOutput* output, 82 url_parse::Parsed* new_parsed) { 83 return DoCanonicalizeMailtoURL<base::char16, base::char16>( 84 URLComponentSource<base::char16>(spec), parsed, output, new_parsed); 85} 86 87bool ReplaceMailtoURL(const char* base, 88 const url_parse::Parsed& base_parsed, 89 const Replacements<char>& replacements, 90 CanonOutput* output, 91 url_parse::Parsed* new_parsed) { 92 URLComponentSource<char> source(base); 93 url_parse::Parsed parsed(base_parsed); 94 SetupOverrideComponents(base, replacements, &source, &parsed); 95 return DoCanonicalizeMailtoURL<char, unsigned char>( 96 source, parsed, output, new_parsed); 97} 98 99bool ReplaceMailtoURL(const char* base, 100 const url_parse::Parsed& base_parsed, 101 const Replacements<base::char16>& replacements, 102 CanonOutput* output, 103 url_parse::Parsed* new_parsed) { 104 RawCanonOutput<1024> utf8; 105 URLComponentSource<char> source(base); 106 url_parse::Parsed parsed(base_parsed); 107 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 108 return DoCanonicalizeMailtoURL<char, unsigned char>( 109 source, parsed, output, new_parsed); 110} 111 112} // namespace url_canon 113