1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef NET_BASE_NET_UTIL_H_ 6#define NET_BASE_NET_UTIL_H_ 7#pragma once 8 9#include "build/build_config.h" 10 11#if defined(OS_WIN) 12#include <windows.h> 13#include <ws2tcpip.h> 14#elif defined(OS_POSIX) 15#include <sys/socket.h> 16#endif 17 18#include <list> 19#include <string> 20#include <set> 21#include <vector> 22 23#include "base/basictypes.h" 24#include "base/string16.h" 25#include "net/base/escape.h" 26#include "net/base/net_export.h" 27 28struct addrinfo; 29class FilePath; 30class GURL; 31 32namespace base { 33class Time; 34} 35 36namespace url_canon { 37struct CanonHostInfo; 38} 39 40namespace url_parse { 41struct Parsed; 42} 43 44namespace net { 45 46// Used by FormatUrl to specify handling of certain parts of the url. 47typedef uint32 FormatUrlType; 48typedef uint32 FormatUrlTypes; 49 50// Used by GetHeaderParamValue to determine how to handle quotes in the value. 51class QuoteRule { 52 public: 53 enum Type { 54 KEEP_OUTER_QUOTES, 55 REMOVE_OUTER_QUOTES, 56 }; 57 58 private: 59 QuoteRule(); 60}; 61 62// Nothing is ommitted. 63extern const FormatUrlType kFormatUrlOmitNothing; 64 65// If set, any username and password are removed. 66extern const FormatUrlType kFormatUrlOmitUsernamePassword; 67 68// If the scheme is 'http://', it's removed. 69extern const FormatUrlType kFormatUrlOmitHTTP; 70 71// Omits the path if it is just a slash and there is no query or ref. This is 72// meaningful for non-file "standard" URLs. 73extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname; 74 75// Convenience for omitting all unecessary types. 76extern const FormatUrlType kFormatUrlOmitAll; 77 78// Holds a list of ports that should be accepted despite bans. 79extern std::multiset<int> explicitly_allowed_ports; 80 81// Given the full path to a file name, creates a file: URL. The returned URL 82// may not be valid if the input is malformed. 83GURL FilePathToFileURL(const FilePath& path); 84 85// Converts a file: URL back to a filename that can be passed to the OS. The 86// file URL must be well-formed (GURL::is_valid() must return true); we don't 87// handle degenerate cases here. Returns true on success, false if it isn't a 88// valid file URL. On failure, *file_path will be empty. 89bool FileURLToFilePath(const GURL& url, FilePath* file_path); 90 91// Splits an input of the form <host>[":"<port>] into its consitituent parts. 92// Saves the result into |*host| and |*port|. If the input did not have 93// the optional port, sets |*port| to -1. 94// Returns true if the parsing was successful, false otherwise. 95// The returned host is NOT canonicalized, and may be invalid. If <host> is 96// an IPv6 literal address, the returned host includes the square brackets. 97bool ParseHostAndPort(std::string::const_iterator host_and_port_begin, 98 std::string::const_iterator host_and_port_end, 99 std::string* host, 100 int* port); 101bool ParseHostAndPort(const std::string& host_and_port, 102 std::string* host, 103 int* port); 104 105// Returns a host:port string for the given URL. 106std::string GetHostAndPort(const GURL& url); 107 108// Returns a host[:port] string for the given URL, where the port is omitted 109// if it is the default for the URL's scheme. 110std::string GetHostAndOptionalPort(const GURL& url); 111 112// Returns the string representation of an address, like "192.168.0.1". 113// Returns empty string on failure. 114std::string NetAddressToString(const struct addrinfo* net_address); 115std::string NetAddressToString(const struct sockaddr* net_address, 116 socklen_t address_len); 117 118// Same as NetAddressToString, but additionally includes the port number. For 119// example: "192.168.0.1:99" or "[::1]:80". 120std::string NetAddressToStringWithPort(const struct addrinfo* net_address); 121std::string NetAddressToStringWithPort(const struct sockaddr* net_address, 122 socklen_t address_len); 123 124// Returns the hostname of the current system. Returns empty string on failure. 125std::string GetHostName(); 126 127// Extracts the unescaped username/password from |url|, saving the results 128// into |*username| and |*password|. 129void GetIdentityFromURL(const GURL& url, 130 string16* username, 131 string16* password); 132 133// Returns either the host from |url|, or, if the host is empty, the full spec. 134std::string GetHostOrSpecFromURL(const GURL& url); 135 136// Return the value of the HTTP response header with name 'name'. 'headers' 137// should be in the format that URLRequest::GetResponseHeaders() returns. 138// Returns the empty string if the header is not found. 139std::wstring GetSpecificHeader(const std::wstring& headers, 140 const std::wstring& name); 141std::string GetSpecificHeader(const std::string& headers, 142 const std::string& name); 143 144// Return the value of the HTTP response header field's parameter named 145// 'param_name'. Returns the empty string if the parameter is not found or is 146// improperly formatted. 147std::wstring GetHeaderParamValue(const std::wstring& field, 148 const std::wstring& param_name, 149 QuoteRule::Type quote_rule); 150std::string GetHeaderParamValue(const std::string& field, 151 const std::string& param_name, 152 QuoteRule::Type quote_rule); 153 154// Return the filename extracted from Content-Disposition header. The following 155// formats are tried in order listed below: 156// 157// 1. RFC 5987 158// 2. RFC 2047 159// 3. Raw-8bit-characters : 160// a. UTF-8, b. referrer_charset, c. default os codepage. 161// 4. %-escaped UTF-8. 162// 163// In step 3, if referrer_charset is empty(i.e. unknown), 3b is skipped. 164// In step 4, the fallback charsets tried in step 3 are not tried. We 165// can consider doing that later. 166// 167// When a param value is ASCII, but is not in format #2 or format #4 above, 168// it is returned as it is unless it's pretty close to two supported 169// formats but not well-formed. In that case, an empty string is returned. 170// 171// In any case, a caller must check for the empty return value and resort to 172// another means to get a filename (e.g. url). 173// 174// This function does not do any escaping and callers are responsible for 175// escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit. 176// 177// TODO(jungshik): revisit this issue. At the moment, the only caller 178// net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters. The 179// other caller is a unit test. Need to figure out expose this function only to 180// net_util_unittest. 181// 182std::string GetFileNameFromCD(const std::string& header, 183 const std::string& referrer_charset); 184 185// Converts the given host name to unicode characters. This can be called for 186// any host name, if the input is not IDN or is invalid in some way, we'll just 187// return the ASCII source so it is still usable. 188// 189// The input should be the canonicalized ASCII host name from GURL. This 190// function does NOT accept UTF-8! Its length must also be given (this is 191// designed to work on the substring of the host out of a URL spec). 192// 193// |languages| is a comma separated list of ISO 639 language codes. It 194// is used to determine whether a hostname is 'comprehensible' to a user 195// who understands languages listed. |host| will be converted to a 196// human-readable form (Unicode) ONLY when each component of |host| is 197// regarded as 'comprehensible'. Scipt-mixing is not allowed except that 198// Latin letters in the ASCII range can be mixed with a limited set of 199// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). 200// When |languages| is empty, even that mixing is not allowed. 201// 202// (|offset[s]_for_adjustment|) specifies one or more offsets into the original 203// |url|'s spec(); each offset will be adjusted to point at the same logical 204// place in the result strings during decoding. If this isn't possible because 205// an offset points past the end of |host| or into the middle of a punycode 206// sequence, the offending offset will be set to std::wstring::npos. 207// |offset[s]_for_adjustment| may be NULL. 208NET_EXPORT std::wstring IDNToUnicode(const char* host, 209 size_t host_len, 210 const std::wstring& languages, 211 size_t* offset_for_adjustment); 212std::wstring IDNToUnicodeWithOffsets( 213 const char* host, 214 size_t host_len, 215 const std::wstring& languages, 216 std::vector<size_t>* offsets_for_adjustment); 217 218// Canonicalizes |host| and returns it. Also fills |host_info| with 219// IP address information. |host_info| must not be NULL. 220std::string CanonicalizeHost(const std::string& host, 221 url_canon::CanonHostInfo* host_info); 222std::string CanonicalizeHost(const std::wstring& host, 223 url_canon::CanonHostInfo* host_info); 224 225// Returns true if |host| is not an IP address and is compliant with a set of 226// rules based on RFC 1738 and tweaked to be compatible with the real world. 227// The rules are: 228// * One or more components separated by '.' 229// * Each component begins and ends with an alphanumeric character 230// * Each component contains only alphanumeric characters and '-' or '_' 231// * The last component does not begin with a digit 232// * Optional trailing dot after last component (means "treat as FQDN") 233// If |desired_tld| is non-NULL, the host will only be considered invalid if 234// appending it as a trailing component still results in an invalid host. This 235// helps us avoid marking as "invalid" user attempts to open "www.401k.com" by 236// typing 4-0-1-k-<ctrl>+<enter>. 237// 238// NOTE: You should only pass in hosts that have been returned from 239// CanonicalizeHost(), or you may not get accurate results. 240bool IsCanonicalizedHostCompliant(const std::string& host, 241 const std::string& desired_tld); 242 243// Call these functions to get the html snippet for a directory listing. 244// The return values of both functions are in UTF-8. 245std::string GetDirectoryListingHeader(const string16& title); 246 247// Given the name of a file in a directory (ftp or local) and 248// other information (is_dir, size, modification time), it returns 249// the html snippet to add the entry for the file to the directory listing. 250// Currently, it's a script tag containing a call to a Javascript function 251// |addRow|. 252// 253// |name| is the file name to be displayed. |raw_bytes| will be used 254// as the actual target of the link (so for example, ftp links should use 255// server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name| 256// will be used. 257// 258// Both |name| and |raw_bytes| are escaped internally. 259std::string GetDirectoryListingEntry(const string16& name, 260 const std::string& raw_bytes, 261 bool is_dir, int64 size, 262 base::Time modified); 263 264// If text starts with "www." it is removed, otherwise text is returned 265// unmodified. 266string16 StripWWW(const string16& text); 267 268// Gets the filename from the raw Content-Disposition header (as read from the 269// network). Otherwise uses the last path component name or hostname from 270// |url|. If there is no filename or it can't be used, the given |default_name|, 271// will be used unless it is empty. 272 273// Note: it's possible for the suggested filename to be empty (e.g., 274// file:///). referrer_charset is used as one of charsets 275// to interpret a raw 8bit string in C-D header (after interpreting 276// as UTF-8 fails). See the comment for GetFilenameFromCD for more details. 277string16 GetSuggestedFilename(const GURL& url, 278 const std::string& content_disposition, 279 const std::string& referrer_charset, 280 const string16& default_name); 281 282// Checks the given port against a list of ports which are restricted by 283// default. Returns true if the port is allowed, false if it is restricted. 284bool IsPortAllowedByDefault(int port); 285 286// Checks the given port against a list of ports which are restricted by the 287// FTP protocol. Returns true if the port is allowed, false if it is 288// restricted. 289bool IsPortAllowedByFtp(int port); 290 291// Check if banned |port| has been overriden by an entry in 292// |explicitly_allowed_ports_|. 293bool IsPortAllowedByOverride(int port); 294 295// Set socket to non-blocking mode 296int SetNonBlocking(int fd); 297 298// Appends the given part of the original URL to the output string formatted for 299// the user. The given parsed structure will be updated. The host name formatter 300// also takes the same accept languages component as ElideURL. |new_parsed| may 301// be null. 302// 303// (|offset[s]_for_adjustment|) specifies one or more offsets into the original 304// |url|'s spec(); each offset will be adjusted to point at the same logical 305// place in the result strings after reformatting of the host. If this isn't 306// possible because an offset points past the end of the host or into the middle 307// of a multi-character sequence, the offending offset will be set to 308// std::wstring::npos. |offset[s]_for_adjustment| may be NULL. 309void AppendFormattedHost(const GURL& url, 310 const std::wstring& languages, 311 std::wstring* output, 312 url_parse::Parsed* new_parsed, 313 size_t* offset_for_adjustment); 314void AppendFormattedHostWithOffsets( 315 const GURL& url, 316 const std::wstring& languages, 317 std::wstring* output, 318 url_parse::Parsed* new_parsed, 319 std::vector<size_t>* offsets_for_adjustment); 320 321// Creates a string representation of |url|. The IDN host name may be in Unicode 322// if |languages| accepts the Unicode representation. |format_type| is a bitmask 323// of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean 324// the URL for human readability. You will generally want |UnescapeRule::SPACES| 325// for display to the user if you can handle spaces, or |UnescapeRule::NORMAL| 326// if not. If the path part and the query part seem to be encoded in %-encoded 327// UTF-8, decodes %-encoding and UTF-8. 328// 329// The last three parameters may be NULL. 330// |new_parsed| will be set to the parsing parameters of the resultant URL. 331// |prefix_end| will be the length before the hostname of the resultant URL. 332// 333// (|offset[s]_for_adjustment|) specifies one or more offsets into the original 334// |url|'s spec(); each offset will be modified to reflect changes this function 335// makes to the output string. For example, if |url| is "http://a:b@c.com/", 336// |omit_username_password| is true, and an offset is 12 (the offset of '.'), 337// then on return the output string will be "http://c.com/" and the offset will 338// be 8. If an offset cannot be successfully adjusted (e.g. because it points 339// into the middle of a component that was entirely removed, past the end of the 340// string, or into the middle of an encoding sequence), it will be set to 341// string16::npos. 342string16 FormatUrl(const GURL& url, 343 const std::string& languages, 344 FormatUrlTypes format_types, 345 UnescapeRule::Type unescape_rules, 346 url_parse::Parsed* new_parsed, 347 size_t* prefix_end, 348 size_t* offset_for_adjustment); 349string16 FormatUrlWithOffsets(const GURL& url, 350 const std::string& languages, 351 FormatUrlTypes format_types, 352 UnescapeRule::Type unescape_rules, 353 url_parse::Parsed* new_parsed, 354 size_t* prefix_end, 355 std::vector<size_t>* offsets_for_adjustment); 356 357// This is a convenience function for FormatUrl() with 358// format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical 359// set of flags for "URLs to display to the user". You should be cautious about 360// using this for URLs which will be parsed or sent to other applications. 361inline string16 FormatUrl(const GURL& url, const std::string& languages) { 362 return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES, 363 NULL, NULL, NULL); 364} 365 366// Returns whether FormatUrl() would strip a trailing slash from |url|, given a 367// format flag including kFormatUrlOmitTrailingSlashOnBareHostname. 368bool CanStripTrailingSlash(const GURL& url); 369 370// Strip the portions of |url| that aren't core to the network request. 371// - user name / password 372// - reference section 373GURL SimplifyUrlForRequest(const GURL& url); 374 375void SetExplicitlyAllowedPorts(const std::string& allowed_ports); 376 377class ScopedPortException { 378 public: 379 ScopedPortException(int port); 380 ~ScopedPortException(); 381 382 private: 383 int port_; 384 385 DISALLOW_COPY_AND_ASSIGN(ScopedPortException); 386}; 387 388// Perform a simplistic test to see if IPv6 is supported by trying to create an 389// IPv6 socket. 390// TODO(jar): Make test more in-depth as needed. 391bool IPv6Supported(); 392 393// Returns true if it can determine that only loopback addresses are configured. 394// i.e. if only 127.0.0.1 and ::1 are routable. 395bool HaveOnlyLoopbackAddresses(); 396 397// IPAddressNumber is used to represent an IP address's numeric value as an 398// array of bytes, from most significant to least significant. This is the 399// network byte ordering. 400// 401// IPv4 addresses will have length 4, whereas IPv6 address will have length 16. 402typedef std::vector<unsigned char> IPAddressNumber; 403 404static const size_t kIPv4AddressSize = 4; 405static const size_t kIPv6AddressSize = 16; 406 407// Parses an IP address literal (either IPv4 or IPv6) to its numeric value. 408// Returns true on success and fills |ip_number| with the numeric value. 409bool ParseIPLiteralToNumber(const std::string& ip_literal, 410 IPAddressNumber* ip_number); 411 412// Converts an IPv4 address to an IPv4-mapped IPv6 address. 413// For example 192.168.0.1 would be converted to ::ffff:192.168.0.1. 414IPAddressNumber ConvertIPv4NumberToIPv6Number( 415 const IPAddressNumber& ipv4_number); 416 417// Parses an IP block specifier from CIDR notation to an 418// (IP address, prefix length) pair. Returns true on success and fills 419// |*ip_number| with the numeric value of the IP address and sets 420// |*prefix_length_in_bits| with the length of the prefix. 421// 422// CIDR notation literals can use either IPv4 or IPv6 literals. Some examples: 423// 424// 10.10.3.1/20 425// a:b:c::/46 426// ::1/128 427bool ParseCIDRBlock(const std::string& cidr_literal, 428 IPAddressNumber* ip_number, 429 size_t* prefix_length_in_bits); 430 431// Compares an IP address to see if it falls within the specified IP block. 432// Returns true if it does, false otherwise. 433// 434// The IP block is given by (|ip_prefix|, |prefix_length_in_bits|) -- any 435// IP address whose |prefix_length_in_bits| most significant bits match 436// |ip_prefix| will be matched. 437// 438// In cases when an IPv4 address is being compared to an IPv6 address prefix 439// and vice versa, the IPv4 addresses will be converted to IPv4-mapped 440// (IPv6) addresses. 441bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number, 442 const IPAddressNumber& ip_prefix, 443 size_t prefix_length_in_bits); 444 445// Makes a copy of |info|. The dynamically-allocated parts are copied as well. 446// If |recursive| is true, chained entries via ai_next are copied too. 447// The copy returned by this function should be freed using 448// FreeCopyOfAddrinfo(), and NOT freeaddrinfo(). 449struct addrinfo* CreateCopyOfAddrinfo(const struct addrinfo* info, 450 bool recursive); 451 452// Frees an addrinfo that was created by CreateCopyOfAddrinfo(). 453void FreeCopyOfAddrinfo(struct addrinfo* info); 454 455// Returns the port field of the sockaddr in |info|. 456const uint16* GetPortFieldFromAddrinfo(const struct addrinfo* info); 457uint16* GetPortFieldFromAddrinfo(struct addrinfo* info); 458 459// Returns the value of |info's| port (in host byte ordering). 460int GetPortFromAddrinfo(const struct addrinfo* info); 461 462// Same except for struct sockaddr. 463const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address, 464 socklen_t address_len); 465int GetPortFromSockaddr(const struct sockaddr* address, 466 socklen_t address_len); 467 468// Returns true if |host| is one of the names (e.g. "localhost") or IP 469// addresses (IPv4 127.0.0.0/8 or IPv6 ::1) that indicate a loopback. 470// 471// Note that this function does not check for IP addresses other than 472// the above, although other IP addresses may point to the local 473// machine. 474bool IsLocalhost(const std::string& host); 475 476// struct that is used by GetNetworkList() to represent a network 477// interface. 478struct NetworkInterface { 479 NetworkInterface(); 480 NetworkInterface(const std::string& name, const IPAddressNumber& address); 481 ~NetworkInterface(); 482 483 std::string name; 484 IPAddressNumber address; 485}; 486 487typedef std::list<NetworkInterface> NetworkInterfaceList; 488 489// Returns list of network interfaces except loopback interface. If an 490// interface has more than one address, a separate entry is added to 491// the list for each address. 492// Can be called only on a thread that allows IO. 493bool GetNetworkList(NetworkInterfaceList* networks); 494 495// Private adjustment function called by std::transform which sets the offset 496// to npos if the offset occurs at or before |component_start|, otherwise don't 497// alter the offset. Exposed here for unit testing. 498struct ClampComponentOffset { 499 explicit ClampComponentOffset(size_t component_start); 500 size_t operator()(size_t offset); 501 502 const size_t component_start; 503}; 504 505} // namespace net 506 507#endif // NET_BASE_NET_UTIL_H_ 508