1// Copyright (c) 2009 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef NET_BASE_NET_UTIL_H_ 6#define NET_BASE_NET_UTIL_H_ 7 8#include "build/build_config.h" 9 10#ifdef OS_WIN 11#include <windows.h> 12#endif 13 14#include <string> 15#include <set> 16 17#include "base/basictypes.h" 18#include "base/string16.h" 19#include "net/base/escape.h" 20 21struct addrinfo; 22class FilePath; 23class GURL; 24 25namespace base { 26class Time; 27} 28 29namespace url_canon { 30struct CanonHostInfo; 31} 32 33namespace url_parse { 34struct Parsed; 35} 36 37namespace net { 38 39// Holds a list of ports that should be accepted despite bans. 40extern std::set<int> explicitly_allowed_ports; 41 42// Given the full path to a file name, creates a file: URL. The returned URL 43// may not be valid if the input is malformed. 44GURL FilePathToFileURL(const FilePath& path); 45 46// Converts a file: URL back to a filename that can be passed to the OS. The 47// file URL must be well-formed (GURL::is_valid() must return true); we don't 48// handle degenerate cases here. Returns true on success, false if it isn't a 49// valid file URL. On failure, *file_path will be empty. 50bool FileURLToFilePath(const GURL& url, FilePath* file_path); 51 52// Splits an input of the form <host>[":"<port>] into its consitituent parts. 53// Saves the result into |*host| and |*port|. If the input did not have 54// the optional port, sets |*port| to -1. 55// Returns true if the parsing was successful, false otherwise. 56// The returned host is NOT canonicalized, and may be invalid. If <host> is 57// an IPv6 literal address, the returned host includes the square brackets. 58bool ParseHostAndPort(std::string::const_iterator host_and_port_begin, 59 std::string::const_iterator host_and_port_end, 60 std::string* host, 61 int* port); 62bool ParseHostAndPort(const std::string& host_and_port, 63 std::string* host, 64 int* port); 65 66// Returns a host:port string for the given URL. 67std::string GetHostAndPort(const GURL& url); 68 69// Returns a host[:port] string for the given URL, where the port is omitted 70// if it is the default for the URL's scheme. 71std::string GetHostAndOptionalPort(const GURL& url); 72 73// Returns the string representation of an address, like "192.168.0.1". 74// Returns empty string on failure. 75std::string NetAddressToString(const struct addrinfo* net_address); 76 77// Returns the hostname of the current system. Returns empty string on failure. 78std::string GetHostName(); 79 80// Extracts the unescaped username/password from |url|, saving the results 81// into |*username| and |*password|. 82void GetIdentityFromURL(const GURL& url, 83 std::wstring* username, 84 std::wstring* password); 85 86// Return the value of the HTTP response header with name 'name'. 'headers' 87// should be in the format that URLRequest::GetResponseHeaders() returns. 88// Returns the empty string if the header is not found. 89std::wstring GetSpecificHeader(const std::wstring& headers, 90 const std::wstring& name); 91std::string GetSpecificHeader(const std::string& headers, 92 const std::string& name); 93 94// Return the value of the HTTP response header field's parameter named 95// 'param_name'. Returns the empty string if the parameter is not found or is 96// improperly formatted. 97std::wstring GetHeaderParamValue(const std::wstring& field, 98 const std::wstring& param_name); 99std::string GetHeaderParamValue(const std::string& field, 100 const std::string& param_name); 101 102// Return the filename extracted from Content-Disposition header. The following 103// formats are tried in order listed below: 104// 105// 1. RFC 2047 106// 2. Raw-8bit-characters : 107// a. UTF-8, b. referrer_charset, c. default os codepage. 108// 3. %-escaped UTF-8. 109// 110// In step 2, if referrer_charset is empty(i.e. unknown), 2b is skipped. 111// In step 3, the fallback charsets tried in step 2 are not tried. We 112// can consider doing that later. 113// 114// When a param value is ASCII, but is not in format #1 or format #3 above, 115// it is returned as it is unless it's pretty close to two supported 116// formats but not well-formed. In that case, an empty string is returned. 117// 118// In any case, a caller must check for the empty return value and resort to 119// another means to get a filename (e.g. url). 120// 121// This function does not do any escaping and callers are responsible for 122// escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit. 123// 124// TODO(jungshik): revisit this issue. At the moment, the only caller 125// net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters. The 126// other caller is a unit test. Need to figure out expose this function only to 127// net_util_unittest. 128// 129std::string GetFileNameFromCD(const std::string& header, 130 const std::string& referrer_charset); 131 132// Converts the given host name to unicode characters. This can be called for 133// any host name, if the input is not IDN or is invalid in some way, we'll just 134// return the ASCII source so it is still usable. 135// 136// The input should be the canonicalized ASCII host name from GURL. This 137// function does NOT accept UTF-8! Its length must also be given (this is 138// designed to work on the substring of the host out of a URL spec). 139// 140// |languages| is a comma separated list of ISO 639 language codes. It 141// is used to determine whether a hostname is 'comprehensible' to a user 142// who understands languages listed. |host| will be converted to a 143// human-readable form (Unicode) ONLY when each component of |host| is 144// regarded as 'comprehensible'. Scipt-mixing is not allowed except that 145// Latin letters in the ASCII range can be mixed with a limited set of 146// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). 147// When |languages| is empty, even that mixing is not allowed. 148// 149// |offset_for_adjustment| is an offset into |host|, which will be adjusted to 150// point at the same logical place in the output string. If this isn't possible 151// because it points past the end of |host| or into the middle of a punycode 152// sequence, it will be set to std::wstring::npos. |offset_for_adjustment| may 153// be NULL. 154std::wstring IDNToUnicode(const char* host, 155 size_t host_len, 156 const std::wstring& languages, 157 size_t* offset_for_adjustment); 158 159// Canonicalizes |host| and returns it. Also fills |host_info| with 160// IP address information. |host_info| must not be NULL. 161std::string CanonicalizeHost(const std::string& host, 162 url_canon::CanonHostInfo* host_info); 163std::string CanonicalizeHost(const std::wstring& host, 164 url_canon::CanonHostInfo* host_info); 165 166// Returns true if |host| is not an IP address and is compliant with a set of 167// rules based on RFC 1738 and tweaked to be compatible with the real world. 168// The rules are: 169// * One or more components separated by '.' 170// * Each component begins and ends with an alphanumeric character 171// * Each component contains only alphanumeric characters and '-' or '_' 172// * The last component does not begin with a digit 173// * Optional trailing dot after last component (means "treat as FQDN") 174// 175// NOTE: You should only pass in hosts that have been returned from 176// CanonicalizeHost(), or you may not get accurate results. 177bool IsCanonicalizedHostCompliant(const std::string& host); 178 179// Call these functions to get the html snippet for a directory listing. 180// The return values of both functions are in UTF-8. 181std::string GetDirectoryListingHeader(const string16& title); 182 183// Given the name of a file in a directory (ftp or local) and 184// other information (is_dir, size, modification time), it returns 185// the html snippet to add the entry for the file to the directory listing. 186// Currently, it's a script tag containing a call to a Javascript function 187// |addRow|. 188// 189// Its 1st parameter is derived from |name| and is the Javascript-string 190// escaped form of |name| (i.e \uXXXX). The 2nd parameter is the url-escaped 191// |raw_bytes| if it's not empty. If empty, the 2nd parameter is the 192// url-escaped |name| in UTF-8. 193std::string GetDirectoryListingEntry(const string16& name, 194 const std::string& raw_bytes, 195 bool is_dir, int64 size, 196 base::Time modified); 197 198// If text starts with "www." it is removed, otherwise text is returned 199// unmodified. 200std::wstring StripWWW(const std::wstring& text); 201 202// Gets the filename from the raw Content-Disposition header (as read from the 203// network). Otherwise uses the last path component name or hostname from 204// |url|. If there is no filename or it can't be used, the given |default_name|, 205// will be used unless it is empty. 206 207// Note: it's possible for the suggested filename to be empty (e.g., 208// file:///). referrer_charset is used as one of charsets 209// to interpret a raw 8bit string in C-D header (after interpreting 210// as UTF-8 fails). See the comment for GetFilenameFromCD for more details. 211FilePath GetSuggestedFilename(const GURL& url, 212 const std::string& content_disposition, 213 const std::string& referrer_charset, 214 const FilePath& default_name); 215 216// Checks the given port against a list of ports which are restricted by 217// default. Returns true if the port is allowed, false if it is restricted. 218bool IsPortAllowedByDefault(int port); 219 220// Checks the given port against a list of ports which are restricted by the 221// FTP protocol. Returns true if the port is allowed, false if it is 222// restricted. 223bool IsPortAllowedByFtp(int port); 224 225// Check if banned |port| has been overriden by an entry in 226// |explicitly_allowed_ports_|. 227bool IsPortAllowedByOverride(int port); 228 229// Set socket to non-blocking mode 230int SetNonBlocking(int fd); 231 232// Appends the given part of the original URL to the output string formatted for 233// the user. The given parsed structure will be updated. The host name formatter 234// also takes the same accept languages component as ElideURL. |new_parsed| may 235// be null. 236void AppendFormattedHost(const GURL& url, 237 const std::wstring& languages, 238 std::wstring* output, 239 url_parse::Parsed* new_parsed, 240 size_t* offset_for_adjustment); 241 242// Creates a string representation of |url|. The IDN host name may be in Unicode 243// if |languages| accepts the Unicode representation. If 244// |omit_username_password| is true, any username and password are removed. 245// |unescape_rules| defines how to clean the URL for human readability. 246// You will generally want |UnescapeRule::SPACES| for display to the user if you 247// can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the 248// query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and 249// UTF-8. 250// 251// The last three parameters may be NULL. 252// |new_parsed| will be set to the parsing parameters of the resultant URL. 253// |prefix_end| will be the length before the hostname of the resultant URL. 254// |offset_for_adjustment| is an offset into the original |url|'s spec(), which 255// will be modified to reflect changes this function makes to the output string; 256// for example, if |url| is "http://a:b@c.com/", |omit_username_password| is 257// true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return 258// the output string will be "http://c.com/" and |offset_for_adjustment| will be 259// 8. If the offset cannot be successfully adjusted (e.g. because it points 260// into the middle of a component that was entirely removed, past the end of the 261// string, or into the middle of an encoding sequence), it will be set to 262// std::wstring::npos. 263std::wstring FormatUrl(const GURL& url, 264 const std::wstring& languages, 265 bool omit_username_password, 266 UnescapeRule::Type unescape_rules, 267 url_parse::Parsed* new_parsed, 268 size_t* prefix_end, 269 size_t* offset_for_adjustment); 270 271// Creates a string representation of |url| for display to the user. 272// This is a shorthand of the above function with omit_username_password=true, 273// unescape=SPACES, new_parsed=NULL, and prefix_end=NULL. 274inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) { 275 return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL, 276 NULL); 277} 278 279// Strip the portions of |url| that aren't core to the network request. 280// - user name / password 281// - reference section 282GURL SimplifyUrlForRequest(const GURL& url); 283 284void SetExplicitlyAllowedPorts(const std::wstring& allowed_ports); 285 286} // namespace net 287 288#endif // NET_BASE_NET_UTIL_H_ 289