1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Functions for canonicalizing "mailto:" URLs.
6
7#include "url/url_canon.h"
8#include "url/url_canon_internal.h"
9#include "url/url_file.h"
10#include "url/url_parse_internal.h"
11
12namespace url_canon {
13
14namespace {
15
16
17template<typename CHAR, typename UCHAR>
18bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
19                             const url_parse::Parsed& parsed,
20                             CanonOutput* output,
21                             url_parse::Parsed* new_parsed) {
22
23  // mailto: only uses {scheme, path, query} -- clear the rest.
24  new_parsed->username = url_parse::Component();
25  new_parsed->password = url_parse::Component();
26  new_parsed->host = url_parse::Component();
27  new_parsed->port = url_parse::Component();
28  new_parsed->ref = url_parse::Component();
29
30  // Scheme (known, so we don't bother running it through the more
31  // complicated scheme canonicalizer).
32  new_parsed->scheme.begin = output->length();
33  output->Append("mailto:", 7);
34  new_parsed->scheme.len = 6;
35
36  bool success = true;
37
38  // Path
39  if (parsed.path.is_valid()) {
40    new_parsed->path.begin = output->length();
41
42    // Copy the path using path URL's more lax escaping rules.
43    // We convert to UTF-8 and escape non-ASCII, but leave all
44    // ASCII characters alone.
45    int end = parsed.path.end();
46    for (int i = parsed.path.begin; i < end; ++i) {
47      UCHAR uch = static_cast<UCHAR>(source.path[i]);
48      if (uch < 0x20 || uch >= 0x80)
49        success &= AppendUTF8EscapedChar(source.path, &i, end, output);
50      else
51        output->push_back(static_cast<char>(uch));
52    }
53
54    new_parsed->path.len = output->length() - new_parsed->path.begin;
55  } else {
56    // No path at all
57    new_parsed->path.reset();
58  }
59
60  // Query -- always use the default utf8 charset converter.
61  CanonicalizeQuery(source.query, parsed.query, NULL,
62                    output, &new_parsed->query);
63
64  return success;
65}
66
67} // namespace
68
69bool CanonicalizeMailtoURL(const char* spec,
70                          int spec_len,
71                          const url_parse::Parsed& parsed,
72                          CanonOutput* output,
73                          url_parse::Parsed* new_parsed) {
74  return DoCanonicalizeMailtoURL<char, unsigned char>(
75      URLComponentSource<char>(spec), parsed, output, new_parsed);
76}
77
78bool CanonicalizeMailtoURL(const base::char16* spec,
79                           int spec_len,
80                           const url_parse::Parsed& parsed,
81                           CanonOutput* output,
82                           url_parse::Parsed* new_parsed) {
83  return DoCanonicalizeMailtoURL<base::char16, base::char16>(
84      URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
85}
86
87bool ReplaceMailtoURL(const char* base,
88                      const url_parse::Parsed& base_parsed,
89                      const Replacements<char>& replacements,
90                      CanonOutput* output,
91                      url_parse::Parsed* new_parsed) {
92  URLComponentSource<char> source(base);
93  url_parse::Parsed parsed(base_parsed);
94  SetupOverrideComponents(base, replacements, &source, &parsed);
95  return DoCanonicalizeMailtoURL<char, unsigned char>(
96      source, parsed, output, new_parsed);
97}
98
99bool ReplaceMailtoURL(const char* base,
100                      const url_parse::Parsed& base_parsed,
101                      const Replacements<base::char16>& replacements,
102                      CanonOutput* output,
103                      url_parse::Parsed* new_parsed) {
104  RawCanonOutput<1024> utf8;
105  URLComponentSource<char> source(base);
106  url_parse::Parsed parsed(base_parsed);
107  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
108  return DoCanonicalizeMailtoURL<char, unsigned char>(
109      source, parsed, output, new_parsed);
110}
111
112}  // namespace url_canon
113