1// Copyright 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// Functions for canonicalizing "mailto:" URLs.
31
32#include "googleurl/src/url_canon.h"
33#include "googleurl/src/url_canon_internal.h"
34#include "googleurl/src/url_file.h"
35#include "googleurl/src/url_parse_internal.h"
36
37namespace url_canon {
38
39namespace {
40
41
42template<typename CHAR, typename UCHAR>
43bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
44                             const url_parse::Parsed& parsed,
45                             CanonOutput* output,
46                             url_parse::Parsed* new_parsed) {
47
48  // mailto: only uses {scheme, path, query} -- clear the rest.
49  new_parsed->username = url_parse::Component();
50  new_parsed->password = url_parse::Component();
51  new_parsed->host = url_parse::Component();
52  new_parsed->port = url_parse::Component();
53  new_parsed->ref = url_parse::Component();
54
55  // Scheme (known, so we don't bother running it through the more
56  // complicated scheme canonicalizer).
57  new_parsed->scheme.begin = output->length();
58  output->Append("mailto:", 7);
59  new_parsed->scheme.len = 6;
60
61  bool success = true;
62
63  // Path
64  if (parsed.path.is_valid()) {
65    new_parsed->path.begin = output->length();
66
67    // Copy the path using path URL's more lax escaping rules.
68    // We convert to UTF-8 and escape non-ASCII, but leave all
69    // ASCII characters alone.
70    int end = parsed.path.end();
71    for (int i = parsed.path.begin; i < end; ++i) {
72      UCHAR uch = static_cast<UCHAR>(source.path[i]);
73      if (uch < 0x20 || uch >= 0x80)
74        success &= AppendUTF8EscapedChar(source.path, &i, end, output);
75      else
76        output->push_back(static_cast<char>(uch));
77    }
78
79    new_parsed->path.len = output->length() - new_parsed->path.begin;
80  } else {
81    // No path at all
82    new_parsed->path.reset();
83  }
84
85  // Query -- always use the default utf8 charset converter.
86  CanonicalizeQuery(source.query, parsed.query, NULL,
87                    output, &new_parsed->query);
88
89  return success;
90}
91
92} // namespace
93
94bool CanonicalizeMailtoURL(const char* spec,
95                          int spec_len,
96                          const url_parse::Parsed& parsed,
97                          CanonOutput* output,
98                          url_parse::Parsed* new_parsed) {
99  return DoCanonicalizeMailtoURL<char, unsigned char>(
100      URLComponentSource<char>(spec), parsed, output, new_parsed);
101}
102
103bool CanonicalizeMailtoURL(const char16* spec,
104                           int spec_len,
105                           const url_parse::Parsed& parsed,
106                           CanonOutput* output,
107                           url_parse::Parsed* new_parsed) {
108  return DoCanonicalizeMailtoURL<char16, char16>(
109      URLComponentSource<char16>(spec), parsed, output, new_parsed);
110}
111
112bool ReplaceMailtoURL(const char* base,
113                      const url_parse::Parsed& base_parsed,
114                      const Replacements<char>& replacements,
115                      CanonOutput* output,
116                      url_parse::Parsed* new_parsed) {
117  URLComponentSource<char> source(base);
118  url_parse::Parsed parsed(base_parsed);
119  SetupOverrideComponents(base, replacements, &source, &parsed);
120  return DoCanonicalizeMailtoURL<char, unsigned char>(
121      source, parsed, output, new_parsed);
122}
123
124bool ReplaceMailtoURL(const char* base,
125                      const url_parse::Parsed& base_parsed,
126                      const Replacements<char16>& replacements,
127                      CanonOutput* output,
128                      url_parse::Parsed* new_parsed) {
129  RawCanonOutput<1024> utf8;
130  URLComponentSource<char> source(base);
131  url_parse::Parsed parsed(base_parsed);
132  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
133  return DoCanonicalizeMailtoURL<char, unsigned char>(
134      source, parsed, output, new_parsed);
135}
136
137}  // namespace url_canon
138