1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef NET_BASE_NET_UTIL_H_
6#define NET_BASE_NET_UTIL_H_
7#pragma once
8
9#include "build/build_config.h"
10
11#if defined(OS_WIN)
12#include <windows.h>
13#include <ws2tcpip.h>
14#elif defined(OS_POSIX)
15#include <sys/socket.h>
16#endif
17
18#include <list>
19#include <string>
20#include <set>
21#include <vector>
22
23#include "base/basictypes.h"
24#include "base/string16.h"
25#include "net/base/escape.h"
26#include "net/base/net_export.h"
27
28struct addrinfo;
29class FilePath;
30class GURL;
31
32namespace base {
33class Time;
34}
35
36namespace url_canon {
37struct CanonHostInfo;
38}
39
40namespace url_parse {
41struct Parsed;
42}
43
44namespace net {
45
46// Used by FormatUrl to specify handling of certain parts of the url.
47typedef uint32 FormatUrlType;
48typedef uint32 FormatUrlTypes;
49
50// Used by GetHeaderParamValue to determine how to handle quotes in the value.
51class QuoteRule {
52 public:
53  enum Type {
54    KEEP_OUTER_QUOTES,
55    REMOVE_OUTER_QUOTES,
56  };
57
58 private:
59  QuoteRule();
60};
61
62// Nothing is ommitted.
63extern const FormatUrlType kFormatUrlOmitNothing;
64
65// If set, any username and password are removed.
66extern const FormatUrlType kFormatUrlOmitUsernamePassword;
67
68// If the scheme is 'http://', it's removed.
69extern const FormatUrlType kFormatUrlOmitHTTP;
70
71// Omits the path if it is just a slash and there is no query or ref.  This is
72// meaningful for non-file "standard" URLs.
73extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
74
75// Convenience for omitting all unecessary types.
76extern const FormatUrlType kFormatUrlOmitAll;
77
78// Holds a list of ports that should be accepted despite bans.
79extern std::multiset<int> explicitly_allowed_ports;
80
81// Given the full path to a file name, creates a file: URL. The returned URL
82// may not be valid if the input is malformed.
83GURL FilePathToFileURL(const FilePath& path);
84
85// Converts a file: URL back to a filename that can be passed to the OS. The
86// file URL must be well-formed (GURL::is_valid() must return true); we don't
87// handle degenerate cases here. Returns true on success, false if it isn't a
88// valid file URL. On failure, *file_path will be empty.
89bool FileURLToFilePath(const GURL& url, FilePath* file_path);
90
91// Splits an input of the form <host>[":"<port>] into its consitituent parts.
92// Saves the result into |*host| and |*port|. If the input did not have
93// the optional port, sets |*port| to -1.
94// Returns true if the parsing was successful, false otherwise.
95// The returned host is NOT canonicalized, and may be invalid. If <host> is
96// an IPv6 literal address, the returned host includes the square brackets.
97bool ParseHostAndPort(std::string::const_iterator host_and_port_begin,
98                      std::string::const_iterator host_and_port_end,
99                      std::string* host,
100                      int* port);
101bool ParseHostAndPort(const std::string& host_and_port,
102                      std::string* host,
103                      int* port);
104
105// Returns a host:port string for the given URL.
106std::string GetHostAndPort(const GURL& url);
107
108// Returns a host[:port] string for the given URL, where the port is omitted
109// if it is the default for the URL's scheme.
110std::string GetHostAndOptionalPort(const GURL& url);
111
112// Returns the string representation of an address, like "192.168.0.1".
113// Returns empty string on failure.
114std::string NetAddressToString(const struct addrinfo* net_address);
115std::string NetAddressToString(const struct sockaddr* net_address,
116                               socklen_t address_len);
117
118// Same as NetAddressToString, but additionally includes the port number. For
119// example: "192.168.0.1:99" or "[::1]:80".
120std::string NetAddressToStringWithPort(const struct addrinfo* net_address);
121std::string NetAddressToStringWithPort(const struct sockaddr* net_address,
122                                       socklen_t address_len);
123
124// Returns the hostname of the current system. Returns empty string on failure.
125std::string GetHostName();
126
127// Extracts the unescaped username/password from |url|, saving the results
128// into |*username| and |*password|.
129void GetIdentityFromURL(const GURL& url,
130                        string16* username,
131                        string16* password);
132
133// Returns either the host from |url|, or, if the host is empty, the full spec.
134std::string GetHostOrSpecFromURL(const GURL& url);
135
136// Return the value of the HTTP response header with name 'name'.  'headers'
137// should be in the format that URLRequest::GetResponseHeaders() returns.
138// Returns the empty string if the header is not found.
139std::wstring GetSpecificHeader(const std::wstring& headers,
140                               const std::wstring& name);
141std::string GetSpecificHeader(const std::string& headers,
142                              const std::string& name);
143
144// Return the value of the HTTP response header field's parameter named
145// 'param_name'.  Returns the empty string if the parameter is not found or is
146// improperly formatted.
147std::wstring GetHeaderParamValue(const std::wstring& field,
148                                 const std::wstring& param_name,
149                                 QuoteRule::Type quote_rule);
150std::string GetHeaderParamValue(const std::string& field,
151                                const std::string& param_name,
152                                QuoteRule::Type quote_rule);
153
154// Return the filename extracted from Content-Disposition header. The following
155// formats are tried in order listed below:
156//
157// 1. RFC 5987
158// 2. RFC 2047
159// 3. Raw-8bit-characters :
160//    a. UTF-8, b. referrer_charset, c. default os codepage.
161// 4. %-escaped UTF-8.
162//
163// In step 3, if referrer_charset is empty(i.e. unknown), 3b is skipped.
164// In step 4, the fallback charsets tried in step 3 are not tried. We
165// can consider doing that later.
166//
167// When a param value is ASCII, but is not in format #2 or format #4 above,
168// it is returned as it is unless it's pretty close to two supported
169// formats but not well-formed. In that case, an empty string is returned.
170//
171// In any case, a caller must check for the empty return value and resort to
172// another means to get a filename (e.g. url).
173//
174// This function does not do any escaping and callers are responsible for
175// escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit.
176//
177// TODO(jungshik): revisit this issue. At the moment, the only caller
178// net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters.  The
179// other caller is a unit test. Need to figure out expose this function only to
180// net_util_unittest.
181//
182std::string GetFileNameFromCD(const std::string& header,
183                              const std::string& referrer_charset);
184
185// Converts the given host name to unicode characters. This can be called for
186// any host name, if the input is not IDN or is invalid in some way, we'll just
187// return the ASCII source so it is still usable.
188//
189// The input should be the canonicalized ASCII host name from GURL. This
190// function does NOT accept UTF-8! Its length must also be given (this is
191// designed to work on the substring of the host out of a URL spec).
192//
193// |languages| is a comma separated list of ISO 639 language codes. It
194// is used to determine whether a hostname is 'comprehensible' to a user
195// who understands languages listed. |host| will be converted to a
196// human-readable form (Unicode) ONLY when each component of |host| is
197// regarded as 'comprehensible'. Scipt-mixing is not allowed except that
198// Latin letters in the ASCII range can be mixed with a limited set of
199// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
200// When |languages| is empty, even that mixing is not allowed.
201//
202// (|offset[s]_for_adjustment|) specifies one or more offsets into the original
203// |url|'s spec(); each offset will be adjusted to point at the same logical
204// place in the result strings during decoding.  If this isn't possible because
205// an offset points past the end of |host| or into the middle of a punycode
206// sequence, the offending offset will be set to std::wstring::npos.
207// |offset[s]_for_adjustment| may be NULL.
208NET_EXPORT std::wstring IDNToUnicode(const char* host,
209                          size_t host_len,
210                          const std::wstring& languages,
211                          size_t* offset_for_adjustment);
212std::wstring IDNToUnicodeWithOffsets(
213    const char* host,
214    size_t host_len,
215    const std::wstring& languages,
216    std::vector<size_t>* offsets_for_adjustment);
217
218// Canonicalizes |host| and returns it.  Also fills |host_info| with
219// IP address information.  |host_info| must not be NULL.
220std::string CanonicalizeHost(const std::string& host,
221                             url_canon::CanonHostInfo* host_info);
222std::string CanonicalizeHost(const std::wstring& host,
223                             url_canon::CanonHostInfo* host_info);
224
225// Returns true if |host| is not an IP address and is compliant with a set of
226// rules based on RFC 1738 and tweaked to be compatible with the real world.
227// The rules are:
228//   * One or more components separated by '.'
229//   * Each component begins and ends with an alphanumeric character
230//   * Each component contains only alphanumeric characters and '-' or '_'
231//   * The last component does not begin with a digit
232//   * Optional trailing dot after last component (means "treat as FQDN")
233// If |desired_tld| is non-NULL, the host will only be considered invalid if
234// appending it as a trailing component still results in an invalid host.  This
235// helps us avoid marking as "invalid" user attempts to open "www.401k.com" by
236// typing 4-0-1-k-<ctrl>+<enter>.
237//
238// NOTE: You should only pass in hosts that have been returned from
239// CanonicalizeHost(), or you may not get accurate results.
240bool IsCanonicalizedHostCompliant(const std::string& host,
241                                  const std::string& desired_tld);
242
243// Call these functions to get the html snippet for a directory listing.
244// The return values of both functions are in UTF-8.
245std::string GetDirectoryListingHeader(const string16& title);
246
247// Given the name of a file in a directory (ftp or local) and
248// other information (is_dir, size, modification time), it returns
249// the html snippet to add the entry for the file to the directory listing.
250// Currently, it's a script tag containing a call to a Javascript function
251// |addRow|.
252//
253// |name| is the file name to be displayed. |raw_bytes| will be used
254// as the actual target of the link (so for example, ftp links should use
255// server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name|
256// will be used.
257//
258// Both |name| and |raw_bytes| are escaped internally.
259std::string GetDirectoryListingEntry(const string16& name,
260                                     const std::string& raw_bytes,
261                                     bool is_dir, int64 size,
262                                     base::Time modified);
263
264// If text starts with "www." it is removed, otherwise text is returned
265// unmodified.
266string16 StripWWW(const string16& text);
267
268// Gets the filename from the raw Content-Disposition header (as read from the
269// network).  Otherwise uses the last path component name or hostname from
270// |url|. If there is no filename or it can't be used, the given |default_name|,
271// will be used unless it is empty.
272
273// Note: it's possible for the suggested filename to be empty (e.g.,
274// file:///). referrer_charset is used as one of charsets
275// to interpret a raw 8bit string in C-D header (after interpreting
276// as UTF-8 fails). See the comment for GetFilenameFromCD for more details.
277string16 GetSuggestedFilename(const GURL& url,
278                              const std::string& content_disposition,
279                              const std::string& referrer_charset,
280                              const string16& default_name);
281
282// Checks the given port against a list of ports which are restricted by
283// default.  Returns true if the port is allowed, false if it is restricted.
284bool IsPortAllowedByDefault(int port);
285
286// Checks the given port against a list of ports which are restricted by the
287// FTP protocol.  Returns true if the port is allowed, false if it is
288// restricted.
289bool IsPortAllowedByFtp(int port);
290
291// Check if banned |port| has been overriden by an entry in
292// |explicitly_allowed_ports_|.
293bool IsPortAllowedByOverride(int port);
294
295// Set socket to non-blocking mode
296int SetNonBlocking(int fd);
297
298// Appends the given part of the original URL to the output string formatted for
299// the user. The given parsed structure will be updated. The host name formatter
300// also takes the same accept languages component as ElideURL. |new_parsed| may
301// be null.
302//
303// (|offset[s]_for_adjustment|) specifies one or more offsets into the original
304// |url|'s spec(); each offset will be adjusted to point at the same logical
305// place in the result strings after reformatting of the host.  If this isn't
306// possible because an offset points past the end of the host or into the middle
307// of a multi-character sequence, the offending offset will be set to
308// std::wstring::npos. |offset[s]_for_adjustment| may be NULL.
309void AppendFormattedHost(const GURL& url,
310                         const std::wstring& languages,
311                         std::wstring* output,
312                         url_parse::Parsed* new_parsed,
313                         size_t* offset_for_adjustment);
314void AppendFormattedHostWithOffsets(
315    const GURL& url,
316    const std::wstring& languages,
317    std::wstring* output,
318    url_parse::Parsed* new_parsed,
319    std::vector<size_t>* offsets_for_adjustment);
320
321// Creates a string representation of |url|. The IDN host name may be in Unicode
322// if |languages| accepts the Unicode representation. |format_type| is a bitmask
323// of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
324// the URL for human readability. You will generally want |UnescapeRule::SPACES|
325// for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
326// if not. If the path part and the query part seem to be encoded in %-encoded
327// UTF-8, decodes %-encoding and UTF-8.
328//
329// The last three parameters may be NULL.
330// |new_parsed| will be set to the parsing parameters of the resultant URL.
331// |prefix_end| will be the length before the hostname of the resultant URL.
332//
333// (|offset[s]_for_adjustment|) specifies one or more offsets into the original
334// |url|'s spec(); each offset will be modified to reflect changes this function
335// makes to the output string. For example, if |url| is "http://a:b@c.com/",
336// |omit_username_password| is true, and an offset is 12 (the offset of '.'),
337// then on return the output string will be "http://c.com/" and the offset will
338// be 8.  If an offset cannot be successfully adjusted (e.g. because it points
339// into the middle of a component that was entirely removed, past the end of the
340// string, or into the middle of an encoding sequence), it will be set to
341// string16::npos.
342string16 FormatUrl(const GURL& url,
343                   const std::string& languages,
344                   FormatUrlTypes format_types,
345                   UnescapeRule::Type unescape_rules,
346                   url_parse::Parsed* new_parsed,
347                   size_t* prefix_end,
348                   size_t* offset_for_adjustment);
349string16 FormatUrlWithOffsets(const GURL& url,
350                              const std::string& languages,
351                              FormatUrlTypes format_types,
352                              UnescapeRule::Type unescape_rules,
353                              url_parse::Parsed* new_parsed,
354                              size_t* prefix_end,
355                              std::vector<size_t>* offsets_for_adjustment);
356
357// This is a convenience function for FormatUrl() with
358// format_types = kFormatUrlOmitAll and unescape = SPACES.  This is the typical
359// set of flags for "URLs to display to the user".  You should be cautious about
360// using this for URLs which will be parsed or sent to other applications.
361inline string16 FormatUrl(const GURL& url, const std::string& languages) {
362  return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES,
363                   NULL, NULL, NULL);
364}
365
366// Returns whether FormatUrl() would strip a trailing slash from |url|, given a
367// format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
368bool CanStripTrailingSlash(const GURL& url);
369
370// Strip the portions of |url| that aren't core to the network request.
371//   - user name / password
372//   - reference section
373GURL SimplifyUrlForRequest(const GURL& url);
374
375void SetExplicitlyAllowedPorts(const std::string& allowed_ports);
376
377class ScopedPortException {
378 public:
379  ScopedPortException(int port);
380  ~ScopedPortException();
381
382 private:
383  int port_;
384
385  DISALLOW_COPY_AND_ASSIGN(ScopedPortException);
386};
387
388// Perform a simplistic test to see if IPv6 is supported by trying to create an
389// IPv6 socket.
390// TODO(jar): Make test more in-depth as needed.
391bool IPv6Supported();
392
393// Returns true if it can determine that only loopback addresses are configured.
394// i.e. if only 127.0.0.1 and ::1 are routable.
395bool HaveOnlyLoopbackAddresses();
396
397// IPAddressNumber is used to represent an IP address's numeric value as an
398// array of bytes, from most significant to least significant. This is the
399// network byte ordering.
400//
401// IPv4 addresses will have length 4, whereas IPv6 address will have length 16.
402typedef std::vector<unsigned char> IPAddressNumber;
403
404static const size_t kIPv4AddressSize = 4;
405static const size_t kIPv6AddressSize = 16;
406
407// Parses an IP address literal (either IPv4 or IPv6) to its numeric value.
408// Returns true on success and fills |ip_number| with the numeric value.
409bool ParseIPLiteralToNumber(const std::string& ip_literal,
410                            IPAddressNumber* ip_number);
411
412// Converts an IPv4 address to an IPv4-mapped IPv6 address.
413// For example 192.168.0.1 would be converted to ::ffff:192.168.0.1.
414IPAddressNumber ConvertIPv4NumberToIPv6Number(
415    const IPAddressNumber& ipv4_number);
416
417// Parses an IP block specifier from CIDR notation to an
418// (IP address, prefix length) pair. Returns true on success and fills
419// |*ip_number| with the numeric value of the IP address and sets
420// |*prefix_length_in_bits| with the length of the prefix.
421//
422// CIDR notation literals can use either IPv4 or IPv6 literals. Some examples:
423//
424//    10.10.3.1/20
425//    a:b:c::/46
426//    ::1/128
427bool ParseCIDRBlock(const std::string& cidr_literal,
428                    IPAddressNumber* ip_number,
429                    size_t* prefix_length_in_bits);
430
431// Compares an IP address to see if it falls within the specified IP block.
432// Returns true if it does, false otherwise.
433//
434// The IP block is given by (|ip_prefix|, |prefix_length_in_bits|) -- any
435// IP address whose |prefix_length_in_bits| most significant bits match
436// |ip_prefix| will be matched.
437//
438// In cases when an IPv4 address is being compared to an IPv6 address prefix
439// and vice versa, the IPv4 addresses will be converted to IPv4-mapped
440// (IPv6) addresses.
441bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number,
442                           const IPAddressNumber& ip_prefix,
443                           size_t prefix_length_in_bits);
444
445// Makes a copy of |info|. The dynamically-allocated parts are copied as well.
446// If |recursive| is true, chained entries via ai_next are copied too.
447// The copy returned by this function should be freed using
448// FreeCopyOfAddrinfo(), and NOT freeaddrinfo().
449struct addrinfo* CreateCopyOfAddrinfo(const struct addrinfo* info,
450                                      bool recursive);
451
452// Frees an addrinfo that was created by CreateCopyOfAddrinfo().
453void FreeCopyOfAddrinfo(struct addrinfo* info);
454
455// Returns the port field of the sockaddr in |info|.
456const uint16* GetPortFieldFromAddrinfo(const struct addrinfo* info);
457uint16* GetPortFieldFromAddrinfo(struct addrinfo* info);
458
459// Returns the value of |info's| port (in host byte ordering).
460int GetPortFromAddrinfo(const struct addrinfo* info);
461
462// Same except for struct sockaddr.
463const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address,
464                                       socklen_t address_len);
465int GetPortFromSockaddr(const struct sockaddr* address,
466                        socklen_t address_len);
467
468// Returns true if |host| is one of the names (e.g. "localhost") or IP
469// addresses (IPv4 127.0.0.0/8 or IPv6 ::1) that indicate a loopback.
470//
471// Note that this function does not check for IP addresses other than
472// the above, although other IP addresses may point to the local
473// machine.
474bool IsLocalhost(const std::string& host);
475
476// struct that is used by GetNetworkList() to represent a network
477// interface.
478struct NetworkInterface {
479  NetworkInterface();
480  NetworkInterface(const std::string& name, const IPAddressNumber& address);
481  ~NetworkInterface();
482
483  std::string name;
484  IPAddressNumber address;
485};
486
487typedef std::list<NetworkInterface> NetworkInterfaceList;
488
489// Returns list of network interfaces except loopback interface. If an
490// interface has more than one address, a separate entry is added to
491// the list for each address.
492// Can be called only on a thread that allows IO.
493bool GetNetworkList(NetworkInterfaceList* networks);
494
495// Private adjustment function called by std::transform which sets the offset
496// to npos if the offset occurs at or before |component_start|, otherwise don't
497// alter the offset. Exposed here for unit testing.
498struct ClampComponentOffset {
499  explicit ClampComponentOffset(size_t component_start);
500  size_t operator()(size_t offset);
501
502  const size_t component_start;
503};
504
505}  // namespace net
506
507#endif  // NET_BASE_NET_UTIL_H_
508