1// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef NET_BASE_NET_UTIL_H_
6#define NET_BASE_NET_UTIL_H_
7
8#include "build/build_config.h"
9
10#ifdef OS_WIN
11#include <windows.h>
12#endif
13
14#include <string>
15#include <set>
16
17#include "base/basictypes.h"
18#include "base/string16.h"
19#include "net/base/escape.h"
20
21struct addrinfo;
22class FilePath;
23class GURL;
24
25namespace base {
26class Time;
27}
28
29namespace url_canon {
30struct CanonHostInfo;
31}
32
33namespace url_parse {
34struct Parsed;
35}
36
37namespace net {
38
39// Holds a list of ports that should be accepted despite bans.
40extern std::set<int> explicitly_allowed_ports;
41
42// Given the full path to a file name, creates a file: URL. The returned URL
43// may not be valid if the input is malformed.
44GURL FilePathToFileURL(const FilePath& path);
45
46// Converts a file: URL back to a filename that can be passed to the OS. The
47// file URL must be well-formed (GURL::is_valid() must return true); we don't
48// handle degenerate cases here. Returns true on success, false if it isn't a
49// valid file URL. On failure, *file_path will be empty.
50bool FileURLToFilePath(const GURL& url, FilePath* file_path);
51
52// Splits an input of the form <host>[":"<port>] into its consitituent parts.
53// Saves the result into |*host| and |*port|. If the input did not have
54// the optional port, sets |*port| to -1.
55// Returns true if the parsing was successful, false otherwise.
56// The returned host is NOT canonicalized, and may be invalid. If <host> is
57// an IPv6 literal address, the returned host includes the square brackets.
58bool ParseHostAndPort(std::string::const_iterator host_and_port_begin,
59                      std::string::const_iterator host_and_port_end,
60                      std::string* host,
61                      int* port);
62bool ParseHostAndPort(const std::string& host_and_port,
63                      std::string* host,
64                      int* port);
65
66// Returns a host:port string for the given URL.
67std::string GetHostAndPort(const GURL& url);
68
69// Returns a host[:port] string for the given URL, where the port is omitted
70// if it is the default for the URL's scheme.
71std::string GetHostAndOptionalPort(const GURL& url);
72
73// Returns the string representation of an address, like "192.168.0.1".
74// Returns empty string on failure.
75std::string NetAddressToString(const struct addrinfo* net_address);
76
77// Returns the hostname of the current system. Returns empty string on failure.
78std::string GetHostName();
79
80// Extracts the unescaped username/password from |url|, saving the results
81// into |*username| and |*password|.
82void GetIdentityFromURL(const GURL& url,
83                        std::wstring* username,
84                        std::wstring* password);
85
86// Return the value of the HTTP response header with name 'name'.  'headers'
87// should be in the format that URLRequest::GetResponseHeaders() returns.
88// Returns the empty string if the header is not found.
89std::wstring GetSpecificHeader(const std::wstring& headers,
90                               const std::wstring& name);
91std::string GetSpecificHeader(const std::string& headers,
92                              const std::string& name);
93
94// Return the value of the HTTP response header field's parameter named
95// 'param_name'.  Returns the empty string if the parameter is not found or is
96// improperly formatted.
97std::wstring GetHeaderParamValue(const std::wstring& field,
98                                 const std::wstring& param_name);
99std::string GetHeaderParamValue(const std::string& field,
100                                const std::string& param_name);
101
102// Return the filename extracted from Content-Disposition header. The following
103// formats are tried in order listed below:
104//
105// 1. RFC 2047
106// 2. Raw-8bit-characters :
107//    a. UTF-8, b. referrer_charset, c. default os codepage.
108// 3. %-escaped UTF-8.
109//
110// In step 2, if referrer_charset is empty(i.e. unknown), 2b is skipped.
111// In step 3, the fallback charsets tried in step 2 are not tried. We
112// can consider doing that later.
113//
114// When a param value is ASCII, but is not in format #1 or format #3 above,
115// it is returned as it is unless it's pretty close to two supported
116// formats but not well-formed. In that case, an empty string is returned.
117//
118// In any case, a caller must check for the empty return value and resort to
119// another means to get a filename (e.g. url).
120//
121// This function does not do any escaping and callers are responsible for
122// escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit.
123//
124// TODO(jungshik): revisit this issue. At the moment, the only caller
125// net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters.  The
126// other caller is a unit test. Need to figure out expose this function only to
127// net_util_unittest.
128//
129std::string GetFileNameFromCD(const std::string& header,
130                              const std::string& referrer_charset);
131
132// Converts the given host name to unicode characters. This can be called for
133// any host name, if the input is not IDN or is invalid in some way, we'll just
134// return the ASCII source so it is still usable.
135//
136// The input should be the canonicalized ASCII host name from GURL. This
137// function does NOT accept UTF-8! Its length must also be given (this is
138// designed to work on the substring of the host out of a URL spec).
139//
140// |languages| is a comma separated list of ISO 639 language codes. It
141// is used to determine whether a hostname is 'comprehensible' to a user
142// who understands languages listed. |host| will be converted to a
143// human-readable form (Unicode) ONLY when each component of |host| is
144// regarded as 'comprehensible'. Scipt-mixing is not allowed except that
145// Latin letters in the ASCII range can be mixed with a limited set of
146// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
147// When |languages| is empty, even that mixing is not allowed.
148//
149// |offset_for_adjustment| is an offset into |host|, which will be adjusted to
150// point at the same logical place in the output string. If this isn't possible
151// because it points past the end of |host| or into the middle of a punycode
152// sequence, it will be set to std::wstring::npos.  |offset_for_adjustment| may
153// be NULL.
154std::wstring IDNToUnicode(const char* host,
155                          size_t host_len,
156                          const std::wstring& languages,
157                          size_t* offset_for_adjustment);
158
159// Canonicalizes |host| and returns it.  Also fills |host_info| with
160// IP address information.  |host_info| must not be NULL.
161std::string CanonicalizeHost(const std::string& host,
162                             url_canon::CanonHostInfo* host_info);
163std::string CanonicalizeHost(const std::wstring& host,
164                             url_canon::CanonHostInfo* host_info);
165
166// Returns true if |host| is not an IP address and is compliant with a set of
167// rules based on RFC 1738 and tweaked to be compatible with the real world.
168// The rules are:
169//   * One or more components separated by '.'
170//   * Each component begins and ends with an alphanumeric character
171//   * Each component contains only alphanumeric characters and '-' or '_'
172//   * The last component does not begin with a digit
173//   * Optional trailing dot after last component (means "treat as FQDN")
174//
175// NOTE: You should only pass in hosts that have been returned from
176// CanonicalizeHost(), or you may not get accurate results.
177bool IsCanonicalizedHostCompliant(const std::string& host);
178
179// Call these functions to get the html snippet for a directory listing.
180// The return values of both functions are in UTF-8.
181std::string GetDirectoryListingHeader(const string16& title);
182
183// Given the name of a file in a directory (ftp or local) and
184// other information (is_dir, size, modification time), it returns
185// the html snippet to add the entry for the file to the directory listing.
186// Currently, it's a script tag containing a call to a Javascript function
187// |addRow|.
188//
189// Its 1st parameter is derived from |name| and is the Javascript-string
190// escaped form of |name| (i.e \uXXXX). The 2nd parameter is the url-escaped
191// |raw_bytes| if it's not empty. If empty, the 2nd parameter is the
192// url-escaped |name| in UTF-8.
193std::string GetDirectoryListingEntry(const string16& name,
194                                     const std::string& raw_bytes,
195                                     bool is_dir, int64 size,
196                                     base::Time modified);
197
198// If text starts with "www." it is removed, otherwise text is returned
199// unmodified.
200std::wstring StripWWW(const std::wstring& text);
201
202// Gets the filename from the raw Content-Disposition header (as read from the
203// network).  Otherwise uses the last path component name or hostname from
204// |url|. If there is no filename or it can't be used, the given |default_name|,
205// will be used unless it is empty.
206
207// Note: it's possible for the suggested filename to be empty (e.g.,
208// file:///). referrer_charset is used as one of charsets
209// to interpret a raw 8bit string in C-D header (after interpreting
210// as UTF-8 fails). See the comment for GetFilenameFromCD for more details.
211FilePath GetSuggestedFilename(const GURL& url,
212                              const std::string& content_disposition,
213                              const std::string& referrer_charset,
214                              const FilePath& default_name);
215
216// Checks the given port against a list of ports which are restricted by
217// default.  Returns true if the port is allowed, false if it is restricted.
218bool IsPortAllowedByDefault(int port);
219
220// Checks the given port against a list of ports which are restricted by the
221// FTP protocol.  Returns true if the port is allowed, false if it is
222// restricted.
223bool IsPortAllowedByFtp(int port);
224
225// Check if banned |port| has been overriden by an entry in
226// |explicitly_allowed_ports_|.
227bool IsPortAllowedByOverride(int port);
228
229// Set socket to non-blocking mode
230int SetNonBlocking(int fd);
231
232// Appends the given part of the original URL to the output string formatted for
233// the user. The given parsed structure will be updated. The host name formatter
234// also takes the same accept languages component as ElideURL. |new_parsed| may
235// be null.
236void AppendFormattedHost(const GURL& url,
237                         const std::wstring& languages,
238                         std::wstring* output,
239                         url_parse::Parsed* new_parsed,
240                         size_t* offset_for_adjustment);
241
242// Creates a string representation of |url|. The IDN host name may be in Unicode
243// if |languages| accepts the Unicode representation. If
244// |omit_username_password| is true, any username and password are removed.
245// |unescape_rules| defines how to clean the URL for human readability.
246// You will generally want |UnescapeRule::SPACES| for display to the user if you
247// can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the
248// query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and
249// UTF-8.
250//
251// The last three parameters may be NULL.
252// |new_parsed| will be set to the parsing parameters of the resultant URL.
253// |prefix_end| will be the length before the hostname of the resultant URL.
254// |offset_for_adjustment| is an offset into the original |url|'s spec(), which
255// will be modified to reflect changes this function makes to the output string;
256// for example, if |url| is "http://a:b@c.com/", |omit_username_password| is
257// true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return
258// the output string will be "http://c.com/" and |offset_for_adjustment| will be
259// 8.  If the offset cannot be successfully adjusted (e.g. because it points
260// into the middle of a component that was entirely removed, past the end of the
261// string, or into the middle of an encoding sequence), it will be set to
262// std::wstring::npos.
263std::wstring FormatUrl(const GURL& url,
264                       const std::wstring& languages,
265                       bool omit_username_password,
266                       UnescapeRule::Type unescape_rules,
267                       url_parse::Parsed* new_parsed,
268                       size_t* prefix_end,
269                       size_t* offset_for_adjustment);
270
271// Creates a string representation of |url| for display to the user.
272// This is a shorthand of the above function with omit_username_password=true,
273// unescape=SPACES, new_parsed=NULL, and prefix_end=NULL.
274inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) {
275  return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL,
276                   NULL);
277}
278
279// Strip the portions of |url| that aren't core to the network request.
280//   - user name / password
281//   - reference section
282GURL SimplifyUrlForRequest(const GURL& url);
283
284void SetExplicitlyAllowedPorts(const std::wstring& allowed_ports);
285
286}  // namespace net
287
288#endif  // NET_BASE_NET_UTIL_H_
289