net_util.cc revision b6cb656ba00bb19ac6723fcd2ad9e3c22fffa9f2
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "net/base/net_util.h" 6 7#include <unicode/regex.h> 8#include <unicode/ucnv.h> 9#include <unicode/uidna.h> 10#include <unicode/ulocdata.h> 11#include <unicode/uniset.h> 12#include <unicode/uscript.h> 13#include <unicode/uset.h> 14#include <algorithm> 15#include <map> 16 17#include "build/build_config.h" 18 19#if defined(OS_WIN) 20#include <windows.h> 21#include <winsock2.h> 22#include <wspiapi.h> // Needed for Win2k compat. 23#elif defined(OS_POSIX) 24#include <fcntl.h> 25#ifndef ANDROID 26#include <ifaddrs.h> 27#endif 28#include <netdb.h> 29#include <net/if.h> 30#include <netinet/in.h> 31#if defined(__BIONIC__) && defined(ANDROID) 32#include <netinet/in6.h> 33#endif 34#endif 35 36#include "base/base64.h" 37#include "base/basictypes.h" 38#include "base/file_path.h" 39#include "base/file_util.h" 40#include "base/i18n/file_util_icu.h" 41#include "base/i18n/icu_string_conversions.h" 42#include "base/i18n/time_formatting.h" 43#include "base/json/string_escape.h" 44#include "base/logging.h" 45#include "base/message_loop.h" 46#include "base/metrics/histogram.h" 47#include "base/path_service.h" 48#include "base/singleton.h" 49#include "base/stl_util-inl.h" 50#include "base/string_number_conversions.h" 51#include "base/string_piece.h" 52#include "base/string_split.h" 53#include "base/string_tokenizer.h" 54#include "base/string_util.h" 55#include "base/stringprintf.h" 56#include "base/synchronization/lock.h" 57#include "base/sys_string_conversions.h" 58#include "base/time.h" 59#include "base/utf_offset_string_conversions.h" 60#include "base/utf_string_conversions.h" 61#include "googleurl/src/gurl.h" 62#include "googleurl/src/url_canon.h" 63#include "googleurl/src/url_canon_ip.h" 64#include "googleurl/src/url_parse.h" 65#include "grit/net_resources.h" 66#include "net/base/dns_util.h" 67#include "net/base/escape.h" 68#include "net/base/net_module.h" 69#if defined(OS_WIN) 70#include "net/base/winsock_init.h" 71#endif 72#include "unicode/datefmt.h" 73 74 75using base::Time; 76 77namespace net { 78 79namespace { 80 81// what we prepend to get a file URL 82static const FilePath::CharType kFileURLPrefix[] = 83 FILE_PATH_LITERAL("file:///"); 84 85// The general list of blocked ports. Will be blocked unless a specific 86// protocol overrides it. (Ex: ftp can use ports 20 and 21) 87static const int kRestrictedPorts[] = { 88 1, // tcpmux 89 7, // echo 90 9, // discard 91 11, // systat 92 13, // daytime 93 15, // netstat 94 17, // qotd 95 19, // chargen 96 20, // ftp data 97 21, // ftp access 98 22, // ssh 99 23, // telnet 100 25, // smtp 101 37, // time 102 42, // name 103 43, // nicname 104 53, // domain 105 77, // priv-rjs 106 79, // finger 107 87, // ttylink 108 95, // supdup 109 101, // hostriame 110 102, // iso-tsap 111 103, // gppitnp 112 104, // acr-nema 113 109, // pop2 114 110, // pop3 115 111, // sunrpc 116 113, // auth 117 115, // sftp 118 117, // uucp-path 119 119, // nntp 120 123, // NTP 121 135, // loc-srv /epmap 122 139, // netbios 123 143, // imap2 124 179, // BGP 125 389, // ldap 126 465, // smtp+ssl 127 512, // print / exec 128 513, // login 129 514, // shell 130 515, // printer 131 526, // tempo 132 530, // courier 133 531, // chat 134 532, // netnews 135 540, // uucp 136 556, // remotefs 137 563, // nntp+ssl 138 587, // stmp? 139 601, // ?? 140 636, // ldap+ssl 141 993, // ldap+ssl 142 995, // pop3+ssl 143 2049, // nfs 144 3659, // apple-sasl / PasswordServer 145 4045, // lockd 146 6000, // X11 147 6665, // Alternate IRC [Apple addition] 148 6666, // Alternate IRC [Apple addition] 149 6667, // Standard IRC [Apple addition] 150 6668, // Alternate IRC [Apple addition] 151 6669, // Alternate IRC [Apple addition] 152 0xFFFF, // Used to block all invalid port numbers (see 153 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) 154}; 155 156// FTP overrides the following restricted ports. 157static const int kAllowedFtpPorts[] = { 158 21, // ftp data 159 22, // ssh 160}; 161 162template<typename STR> 163STR GetSpecificHeaderT(const STR& headers, const STR& name) { 164 // We want to grab the Value from the "Key: Value" pairs in the headers, 165 // which should look like this (no leading spaces, \n-separated) (we format 166 // them this way in url_request_inet.cc): 167 // HTTP/1.1 200 OK\n 168 // ETag: "6d0b8-947-24f35ec0"\n 169 // Content-Length: 2375\n 170 // Content-Type: text/html; charset=UTF-8\n 171 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n 172 if (headers.empty()) 173 return STR(); 174 175 STR match; 176 match.push_back('\n'); 177 match.append(name); 178 match.push_back(':'); 179 180 typename STR::const_iterator begin = 181 search(headers.begin(), headers.end(), match.begin(), match.end(), 182 base::CaseInsensitiveCompareASCII<typename STR::value_type>()); 183 184 if (begin == headers.end()) 185 return STR(); 186 187 begin += match.length(); 188 189 typename STR::const_iterator end = find(begin, headers.end(), '\n'); 190 191 STR ret; 192 TrimWhitespace(STR(begin, end), TRIM_ALL, &ret); 193 return ret; 194} 195 196// Similar to Base64Decode. Decodes a Q-encoded string to a sequence 197// of bytes. If input is invalid, return false. 198bool QPDecode(const std::string& input, std::string* output) { 199 std::string temp; 200 temp.reserve(input.size()); 201 std::string::const_iterator it = input.begin(); 202 while (it != input.end()) { 203 if (*it == '_') { 204 temp.push_back(' '); 205 } else if (*it == '=') { 206 if (input.end() - it < 3) { 207 return false; 208 } 209 if (IsHexDigit(static_cast<unsigned char>(*(it + 1))) && 210 IsHexDigit(static_cast<unsigned char>(*(it + 2)))) { 211 unsigned char ch = HexDigitToInt(*(it + 1)) * 16 + 212 HexDigitToInt(*(it + 2)); 213 temp.push_back(static_cast<char>(ch)); 214 ++it; 215 ++it; 216 } else { 217 return false; 218 } 219 } else if (0x20 < *it && *it < 0x7F) { 220 // In a Q-encoded word, only printable ASCII characters 221 // represent themselves. Besides, space, '=', '_' and '?' are 222 // not allowed, but they're already filtered out. 223 DCHECK(*it != 0x3D && *it != 0x5F && *it != 0x3F); 224 temp.push_back(*it); 225 } else { 226 return false; 227 } 228 ++it; 229 } 230 output->swap(temp); 231 return true; 232} 233 234enum RFC2047EncodingType {Q_ENCODING, B_ENCODING}; 235bool DecodeBQEncoding(const std::string& part, RFC2047EncodingType enc_type, 236 const std::string& charset, std::string* output) { 237 std::string decoded; 238 if (enc_type == B_ENCODING) { 239 if (!base::Base64Decode(part, &decoded)) { 240 return false; 241 } 242 } else { 243 if (!QPDecode(part, &decoded)) { 244 return false; 245 } 246 } 247 248 UErrorCode err = U_ZERO_ERROR; 249 UConverter* converter(ucnv_open(charset.c_str(), &err)); 250 if (U_FAILURE(err)) { 251 return false; 252 } 253 254 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8. 255 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes 256 // in UTF-8. Therefore, the expansion ratio is 3 at most. 257 int length = static_cast<int>(decoded.length()); 258 char* buf = WriteInto(output, length * 3); 259 length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, length * 3, 260 decoded.data(), length, &err); 261 ucnv_close(converter); 262 if (U_FAILURE(err)) { 263 return false; 264 } 265 output->resize(length); 266 return true; 267} 268 269bool DecodeWord(const std::string& encoded_word, 270 const std::string& referrer_charset, 271 bool* is_rfc2047, 272 std::string* output) { 273 *is_rfc2047 = false; 274 output->clear(); 275 if (encoded_word.empty()) 276 return true; 277 278 if (!IsStringASCII(encoded_word)) { 279 // Try UTF-8, referrer_charset and the native OS default charset in turn. 280 if (IsStringUTF8(encoded_word)) { 281 *output = encoded_word; 282 } else { 283 std::wstring wide_output; 284 if (!referrer_charset.empty() && 285 base::CodepageToWide(encoded_word, referrer_charset.c_str(), 286 base::OnStringConversionError::FAIL, 287 &wide_output)) { 288 *output = WideToUTF8(wide_output); 289 } else { 290 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); 291 } 292 } 293 294 return true; 295 } 296 297 // RFC 2047 : one of encoding methods supported by Firefox and relatively 298 // widely used by web servers. 299 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. 300 // We don't care about the length restriction (72 bytes) because 301 // many web servers generate encoded words longer than the limit. 302 std::string tmp; 303 *is_rfc2047 = true; 304 int part_index = 0; 305 std::string charset; 306 StringTokenizer t(encoded_word, "?"); 307 RFC2047EncodingType enc_type = Q_ENCODING; 308 while (*is_rfc2047 && t.GetNext()) { 309 std::string part = t.token(); 310 switch (part_index) { 311 case 0: 312 if (part != "=") { 313 *is_rfc2047 = false; 314 break; 315 } 316 ++part_index; 317 break; 318 case 1: 319 // Do we need charset validity check here? 320 charset = part; 321 ++part_index; 322 break; 323 case 2: 324 if (part.size() > 1 || 325 part.find_first_of("bBqQ") == std::string::npos) { 326 *is_rfc2047 = false; 327 break; 328 } 329 if (part[0] == 'b' || part[0] == 'B') { 330 enc_type = B_ENCODING; 331 } 332 ++part_index; 333 break; 334 case 3: 335 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp); 336 if (!*is_rfc2047) { 337 // Last minute failure. Invalid B/Q encoding. Rather than 338 // passing it through, return now. 339 return false; 340 } 341 ++part_index; 342 break; 343 case 4: 344 if (part != "=") { 345 // Another last minute failure ! 346 // Likely to be a case of two encoded-words in a row or 347 // an encoded word followed by a non-encoded word. We can be 348 // generous, but it does not help much in terms of compatibility, 349 // I believe. Return immediately. 350 *is_rfc2047 = false; 351 return false; 352 } 353 ++part_index; 354 break; 355 default: 356 *is_rfc2047 = false; 357 return false; 358 } 359 } 360 361 if (*is_rfc2047) { 362 if (*(encoded_word.end() - 1) == '=') { 363 output->swap(tmp); 364 return true; 365 } 366 // encoded_word ending prematurelly with '?' or extra '?' 367 *is_rfc2047 = false; 368 return false; 369 } 370 371 // We're not handling 'especial' characters quoted with '\', but 372 // it should be Ok because we're not an email client but a 373 // web browser. 374 375 // What IE6/7 does: %-escaped UTF-8. 376 tmp = UnescapeURLComponent(encoded_word, UnescapeRule::SPACES); 377 if (IsStringUTF8(tmp)) { 378 output->swap(tmp); 379 return true; 380 // We can try either the OS default charset or 'origin charset' here, 381 // As far as I can tell, IE does not support it. However, I've seen 382 // web servers emit %-escaped string in a legacy encoding (usually 383 // origin charset). 384 // TODO(jungshik) : Test IE further and consider adding a fallback here. 385 } 386 return false; 387} 388 389bool DecodeParamValue(const std::string& input, 390 const std::string& referrer_charset, 391 std::string* output) { 392 std::string tmp; 393 // Tokenize with whitespace characters. 394 StringTokenizer t(input, " \t\n\r"); 395 t.set_options(StringTokenizer::RETURN_DELIMS); 396 bool is_previous_token_rfc2047 = true; 397 while (t.GetNext()) { 398 if (t.token_is_delim()) { 399 // If the previous non-delimeter token is not RFC2047-encoded, 400 // put in a space in its place. Otheriwse, skip over it. 401 if (!is_previous_token_rfc2047) { 402 tmp.push_back(' '); 403 } 404 continue; 405 } 406 // We don't support a single multibyte character split into 407 // adjacent encoded words. Some broken mail clients emit headers 408 // with that problem, but most web servers usually encode a filename 409 // in a single encoded-word. Firefox/Thunderbird do not support 410 // it, either. 411 std::string decoded; 412 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, 413 &decoded)) 414 return false; 415 tmp.append(decoded); 416 } 417 output->swap(tmp); 418 return true; 419} 420 421// TODO(mpcomplete): This is a quick and dirty implementation for now. I'm 422// sure this doesn't properly handle all (most?) cases. 423template<typename STR> 424STR GetHeaderParamValueT(const STR& header, const STR& param_name, 425 QuoteRule::Type quote_rule) { 426 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". 427 typename STR::const_iterator param_begin = 428 search(header.begin(), header.end(), param_name.begin(), param_name.end(), 429 base::CaseInsensitiveCompareASCII<typename STR::value_type>()); 430 431 if (param_begin == header.end()) 432 return STR(); 433 param_begin += param_name.length(); 434 435 STR whitespace; 436 whitespace.push_back(' '); 437 whitespace.push_back('\t'); 438 const typename STR::size_type equals_offset = 439 header.find_first_not_of(whitespace, param_begin - header.begin()); 440 if (equals_offset == STR::npos || header.at(equals_offset) != '=') 441 return STR(); 442 443 param_begin = header.begin() + equals_offset + 1; 444 if (param_begin == header.end()) 445 return STR(); 446 447 typename STR::const_iterator param_end; 448 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { 449 param_end = find(param_begin+1, header.end(), '"'); 450 if (param_end == header.end()) 451 return STR(); // poorly formatted param? 452 453 ++param_begin; // skip past the quote. 454 } else { 455 param_end = find(param_begin+1, header.end(), ';'); 456 } 457 458 return STR(param_begin, param_end); 459} 460 461// Does some simple normalization of scripts so we can allow certain scripts 462// to exist together. 463// TODO(brettw) bug 880223: we should allow some other languages to be 464// oombined such as Chinese and Latin. We will probably need a more 465// complicated system of language pairs to have more fine-grained control. 466UScriptCode NormalizeScript(UScriptCode code) { 467 switch (code) { 468 case USCRIPT_KATAKANA: 469 case USCRIPT_HIRAGANA: 470 case USCRIPT_KATAKANA_OR_HIRAGANA: 471 case USCRIPT_HANGUL: // This one is arguable. 472 return USCRIPT_HAN; 473 default: 474 return code; 475 } 476} 477 478bool IsIDNComponentInSingleScript(const char16* str, int str_len) { 479 UScriptCode first_script = USCRIPT_INVALID_CODE; 480 bool is_first = true; 481 482 int i = 0; 483 while (i < str_len) { 484 unsigned code_point; 485 U16_NEXT(str, i, str_len, code_point); 486 487 UErrorCode err = U_ZERO_ERROR; 488 UScriptCode cur_script = uscript_getScript(code_point, &err); 489 if (err != U_ZERO_ERROR) 490 return false; // Report mixed on error. 491 cur_script = NormalizeScript(cur_script); 492 493 // TODO(brettw) We may have to check for USCRIPT_INHERENT as well. 494 if (is_first && cur_script != USCRIPT_COMMON) { 495 first_script = cur_script; 496 is_first = false; 497 } else { 498 if (cur_script != USCRIPT_COMMON && cur_script != first_script) 499 return false; 500 } 501 } 502 return true; 503} 504 505// Check if the script of a language can be 'safely' mixed with 506// Latin letters in the ASCII range. 507bool IsCompatibleWithASCIILetters(const std::string& lang) { 508 // For now, just list Chinese, Japanese and Korean (positive list). 509 // An alternative is negative-listing (languages using Greek and 510 // Cyrillic letters), but it can be more dangerous. 511 return !lang.substr(0, 2).compare("zh") || 512 !lang.substr(0, 2).compare("ja") || 513 !lang.substr(0, 2).compare("ko"); 514} 515 516typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap; 517 518class LangToExemplarSet { 519 public: 520 static LangToExemplarSet* GetInstance() { 521 return Singleton<LangToExemplarSet>::get(); 522 } 523 524 private: 525 LangToExemplarSetMap map; 526 LangToExemplarSet() { } 527 ~LangToExemplarSet() { 528 STLDeleteContainerPairSecondPointers(map.begin(), map.end()); 529 } 530 531 friend class Singleton<LangToExemplarSet>; 532 friend struct DefaultSingletonTraits<LangToExemplarSet>; 533 friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**); 534 friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*); 535 536 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet); 537}; 538 539bool GetExemplarSetForLang(const std::string& lang, 540 icu::UnicodeSet** lang_set) { 541 const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; 542 LangToExemplarSetMap::const_iterator pos = map.find(lang); 543 if (pos != map.end()) { 544 *lang_set = pos->second; 545 return true; 546 } 547 return false; 548} 549 550void SetExemplarSetForLang(const std::string& lang, 551 icu::UnicodeSet* lang_set) { 552 LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; 553 map.insert(std::make_pair(lang, lang_set)); 554} 555 556static base::Lock lang_set_lock; 557 558// Returns true if all the characters in component_characters are used by 559// the language |lang|. 560bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, 561 const std::string& lang) { 562 static const icu::UnicodeSet kASCIILetters(0x61, 0x7a); // [a-z] 563 icu::UnicodeSet* lang_set; 564 // We're called from both the UI thread and the history thread. 565 { 566 base::AutoLock lock(lang_set_lock); 567 if (!GetExemplarSetForLang(lang, &lang_set)) { 568 UErrorCode status = U_ZERO_ERROR; 569 ULocaleData* uld = ulocdata_open(lang.c_str(), &status); 570 // TODO(jungshik) Turn this check on when the ICU data file is 571 // rebuilt with the minimal subset of locale data for languages 572 // to which Chrome is not localized but which we offer in the list 573 // of languages selectable for Accept-Languages. With the rebuilt ICU 574 // data, ulocdata_open never should fall back to the default locale. 575 // (issue 2078) 576 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING); 577 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) { 578 lang_set = reinterpret_cast<icu::UnicodeSet *>( 579 ulocdata_getExemplarSet(uld, NULL, 0, 580 ULOCDATA_ES_STANDARD, &status)); 581 // If |lang| is compatible with ASCII Latin letters, add them. 582 if (IsCompatibleWithASCIILetters(lang)) 583 lang_set->addAll(kASCIILetters); 584 } else { 585 lang_set = new icu::UnicodeSet(1, 0); 586 } 587 lang_set->freeze(); 588 SetExemplarSetForLang(lang, lang_set); 589 ulocdata_close(uld); 590 } 591 } 592 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); 593} 594 595// Returns true if the given Unicode host component is safe to display to the 596// user. 597bool IsIDNComponentSafe(const char16* str, 598 int str_len, 599 const std::wstring& languages) { 600 // Most common cases (non-IDN) do not reach here so that we don't 601 // need a fast return path. 602 // TODO(jungshik) : Check if there's any character inappropriate 603 // (although allowed) for domain names. 604 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and 605 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt 606 // For now, we borrow the list from Mozilla and tweaked it slightly. 607 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because 608 // they're gonna be canonicalized to U+0020 and full stop before 609 // reaching here.) 610 // The original list is available at 611 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and 612 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 613 614 UErrorCode status = U_ZERO_ERROR; 615#ifdef U_WCHAR_IS_UTF16 616 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 617 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" 618 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" 619 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" 620 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" 621 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" 622 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" 623 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" 624 L"[\ufffa-\ufffd]]"), status); 625 DCHECK(U_SUCCESS(status)); 626 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 627 // Lone katakana no, so, or n 628 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" 629 // Repeating Japanese accent characters 630 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), 631 0, status); 632#else 633 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 634 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" 635 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" 636 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" 637 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" 638 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" 639 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" 640 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" 641 "[\\ufffa-\\ufffd]]", -1, US_INV), status); 642 DCHECK(U_SUCCESS(status)); 643 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 644 // Lone katakana no, so, or n 645 "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]" 646 // Repeating Japanese accent characters 647 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), 648 0, status); 649#endif 650 DCHECK(U_SUCCESS(status)); 651 icu::UnicodeSet component_characters; 652 icu::UnicodeString component_string(str, str_len); 653 component_characters.addAll(component_string); 654 if (dangerous_characters.containsSome(component_characters)) 655 return false; 656 657 DCHECK(U_SUCCESS(status)); 658 dangerous_patterns.reset(component_string); 659 if (dangerous_patterns.find()) 660 return false; 661 662 // If the language list is empty, the result is completely determined 663 // by whether a component is a single script or not. This will block 664 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are 665 // allowed with |languages| (while it blocks Chinese + Latin letters with 666 // an accent as should be the case), but we want to err on the safe side 667 // when |languages| is empty. 668 if (languages.empty()) 669 return IsIDNComponentInSingleScript(str, str_len); 670 671 // |common_characters| is made up of ASCII numbers, hyphen, plus and 672 // underscore that are used across scripts and allowed in domain names. 673 // (sync'd with characters allowed in url_canon_host with square 674 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. 675 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), 676 status); 677 DCHECK(U_SUCCESS(status)); 678 // Subtract common characters because they're always allowed so that 679 // we just have to check if a language-specific set contains 680 // the remainder. 681 component_characters.removeAll(common_characters); 682 683 std::string languages_list(WideToASCII(languages)); 684 StringTokenizer t(languages_list, ","); 685 while (t.GetNext()) { 686 if (IsComponentCoveredByLang(component_characters, t.token())) 687 return true; 688 } 689 return false; 690} 691 692// Converts one component of a host (between dots) to IDN if safe. The result 693// will be APPENDED to the given output string and will be the same as the input 694// if it is not IDN or the IDN is unsafe to display. Returns whether any 695// conversion was performed. 696bool IDNToUnicodeOneComponent(const char16* comp, 697 size_t comp_len, 698 const std::wstring& languages, 699 string16* out) { 700 DCHECK(out); 701 if (comp_len == 0) 702 return false; 703 704 // Only transform if the input can be an IDN component. 705 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; 706 if ((comp_len > arraysize(kIdnPrefix)) && 707 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { 708 // Repeatedly expand the output string until it's big enough. It looks like 709 // ICU will return the required size of the buffer, but that's not 710 // documented, so we'll just grow by 2x. This should be rare and is not on a 711 // critical path. 712 size_t original_length = out->length(); 713 for (int extra_space = 64; ; extra_space *= 2) { 714 UErrorCode status = U_ZERO_ERROR; 715 out->resize(out->length() + extra_space); 716 int output_chars = uidna_IDNToUnicode(comp, 717 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, 718 UIDNA_DEFAULT, NULL, &status); 719 if (status == U_ZERO_ERROR) { 720 // Converted successfully. 721 out->resize(original_length + output_chars); 722 if (IsIDNComponentSafe(out->data() + original_length, output_chars, 723 languages)) 724 return true; 725 } 726 727 if (status != U_BUFFER_OVERFLOW_ERROR) 728 break; 729 } 730 // Failed, revert back to original string. 731 out->resize(original_length); 732 } 733 734 // We get here with no IDN or on error, in which case we just append the 735 // literal input. 736 out->append(comp, comp_len); 737 return false; 738} 739 740// If |component| is valid, its begin is incremented by |delta|. 741void AdjustComponent(int delta, url_parse::Component* component) { 742 if (!component->is_valid()) 743 return; 744 745 DCHECK(delta >= 0 || component->begin >= -delta); 746 component->begin += delta; 747} 748 749// Adjusts all the components of |parsed| by |delta|, except for the scheme. 750void AdjustComponents(int delta, url_parse::Parsed* parsed) { 751 AdjustComponent(delta, &(parsed->username)); 752 AdjustComponent(delta, &(parsed->password)); 753 AdjustComponent(delta, &(parsed->host)); 754 AdjustComponent(delta, &(parsed->port)); 755 AdjustComponent(delta, &(parsed->path)); 756 AdjustComponent(delta, &(parsed->query)); 757 AdjustComponent(delta, &(parsed->ref)); 758} 759 760std::wstring FormatUrlInternal(const GURL& url, 761 const std::wstring& languages, 762 FormatUrlTypes format_types, 763 UnescapeRule::Type unescape_rules, 764 url_parse::Parsed* new_parsed, 765 size_t* prefix_end, 766 size_t* offset_for_adjustment); 767 768// Helper for FormatUrl()/FormatUrlInternal(). 769std::wstring FormatViewSourceUrl(const GURL& url, 770 const std::wstring& languages, 771 net::FormatUrlTypes format_types, 772 UnescapeRule::Type unescape_rules, 773 url_parse::Parsed* new_parsed, 774 size_t* prefix_end, 775 size_t* offset_for_adjustment) { 776 DCHECK(new_parsed); 777 const wchar_t* const kWideViewSource = L"view-source:"; 778 const size_t kViewSourceLengthPlus1 = 12; 779 780 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); 781 size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ? 782 std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1); 783 size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ? 784 NULL : &temp_offset; 785 std::wstring result = FormatUrlInternal(real_url, languages, format_types, 786 unescape_rules, new_parsed, prefix_end, temp_offset_ptr); 787 result.insert(0, kWideViewSource); 788 789 // Adjust position values. 790 if (new_parsed->scheme.is_nonempty()) { 791 // Assume "view-source:real-scheme" as a scheme. 792 new_parsed->scheme.len += kViewSourceLengthPlus1; 793 } else { 794 new_parsed->scheme.begin = 0; 795 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; 796 } 797 AdjustComponents(kViewSourceLengthPlus1, new_parsed); 798 if (prefix_end) 799 *prefix_end += kViewSourceLengthPlus1; 800 if (temp_offset_ptr) { 801 *offset_for_adjustment = (temp_offset == std::wstring::npos) ? 802 std::wstring::npos : (temp_offset + kViewSourceLengthPlus1); 803 } 804 return result; 805} 806 807// Appends the substring |in_component| inside of the URL |spec| to |output|, 808// and the resulting range will be filled into |out_component|. |unescape_rules| 809// defines how to clean the URL for human readability. |offset_for_adjustment| 810// is an offset into |output| which will be adjusted based on how it maps to the 811// component being converted; if it is less than output->length(), it will be 812// untouched, and if it is greater than output->length() + in_component.len it 813// will be shortened by the difference in lengths between the input and output 814// components. Otherwise it points into the component being converted, and is 815// adjusted to point to the same logical place in |output|. 816// |offset_for_adjustment| may not be NULL. 817void AppendFormattedComponent(const std::string& spec, 818 const url_parse::Component& in_component, 819 UnescapeRule::Type unescape_rules, 820 std::wstring* output, 821 url_parse::Component* out_component, 822 size_t* offset_for_adjustment) { 823 DCHECK(output); 824 DCHECK(offset_for_adjustment); 825 if (in_component.is_nonempty()) { 826 out_component->begin = static_cast<int>(output->length()); 827 size_t offset_past_current_output = 828 ((*offset_for_adjustment == std::wstring::npos) || 829 (*offset_for_adjustment < output->length())) ? 830 std::wstring::npos : (*offset_for_adjustment - output->length()); 831 size_t* offset_into_component = 832 (offset_past_current_output >= static_cast<size_t>(in_component.len)) ? 833 NULL : &offset_past_current_output; 834 if (unescape_rules == UnescapeRule::NONE) { 835 output->append(UTF8ToWideAndAdjustOffset( 836 spec.substr(in_component.begin, in_component.len), 837 offset_into_component)); 838 } else { 839 output->append(UTF16ToWideHack(UnescapeAndDecodeUTF8URLComponent( 840 spec.substr(in_component.begin, in_component.len), unescape_rules, 841 offset_into_component))); 842 } 843 out_component->len = 844 static_cast<int>(output->length()) - out_component->begin; 845 if (offset_into_component) { 846 *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ? 847 std::wstring::npos : (out_component->begin + *offset_into_component); 848 } else if (offset_past_current_output != std::wstring::npos) { 849 *offset_for_adjustment += out_component->len - in_component.len; 850 } 851 } else { 852 out_component->reset(); 853 } 854} 855 856// TODO(viettrungluu): This is really the old-fashioned version, made internal. 857// I need to really convert |FormatUrl()|. 858std::wstring FormatUrlInternal(const GURL& url, 859 const std::wstring& languages, 860 FormatUrlTypes format_types, 861 UnescapeRule::Type unescape_rules, 862 url_parse::Parsed* new_parsed, 863 size_t* prefix_end, 864 size_t* offset_for_adjustment) { 865 url_parse::Parsed parsed_temp; 866 if (!new_parsed) 867 new_parsed = &parsed_temp; 868 else 869 *new_parsed = url_parse::Parsed(); 870 size_t offset_temp = std::wstring::npos; 871 if (!offset_for_adjustment) 872 offset_for_adjustment = &offset_temp; 873 874 std::wstring url_string; 875 876 // Check for empty URLs or 0 available text width. 877 if (url.is_empty()) { 878 if (prefix_end) 879 *prefix_end = 0; 880 *offset_for_adjustment = std::wstring::npos; 881 return url_string; 882 } 883 884 // Special handling for view-source:. Don't use chrome::kViewSourceScheme 885 // because this library shouldn't depend on chrome. 886 const char* const kViewSource = "view-source"; 887 // Reject "view-source:view-source:..." to avoid deep recursion. 888 const char* const kViewSourceTwice = "view-source:view-source:"; 889 if (url.SchemeIs(kViewSource) && 890 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { 891 return FormatViewSourceUrl(url, languages, format_types, 892 unescape_rules, new_parsed, prefix_end, offset_for_adjustment); 893 } 894 895 // We handle both valid and invalid URLs (this will give us the spec 896 // regardless of validity). 897 const std::string& spec = url.possibly_invalid_spec(); 898 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); 899 if (*offset_for_adjustment >= spec.length()) 900 *offset_for_adjustment = std::wstring::npos; 901 902 // Copy everything before the username (the scheme and the separators.) 903 // These are ASCII. 904 url_string.insert(url_string.end(), spec.begin(), 905 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, 906 true)); 907 908 const wchar_t kHTTP[] = L"http://"; 909 const char kFTP[] = "ftp."; 910 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This 911 // means that if we trim "http://" off a URL whose host starts with "ftp." and 912 // the user inputs this into any field subject to fixup (which is basically 913 // all input fields), the meaning would be changed. (In fact, often the 914 // formatted URL is directly pre-filled into an input field.) For this reason 915 // we avoid stripping "http://" in this case. 916 bool omit_http = 917 (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) && 918 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0); 919 920 new_parsed->scheme = parsed.scheme; 921 922 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { 923 // Remove the username and password fields. We don't want to display those 924 // to the user since they can be used for attacks, 925 // e.g. "http://google.com:search@evil.ru/" 926 new_parsed->username.reset(); 927 new_parsed->password.reset(); 928 if ((*offset_for_adjustment != std::wstring::npos) && 929 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { 930 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { 931 // The seeming off-by-one and off-by-two in these first two lines are to 932 // account for the ':' after the username and '@' after the password. 933 if (*offset_for_adjustment > 934 static_cast<size_t>(parsed.password.end())) { 935 *offset_for_adjustment -= 936 (parsed.username.len + parsed.password.len + 2); 937 } else if (*offset_for_adjustment > 938 static_cast<size_t>(parsed.username.begin)) { 939 *offset_for_adjustment = std::wstring::npos; 940 } 941 } else { 942 const url_parse::Component* nonempty_component = 943 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; 944 // The seeming off-by-one in these first two lines is to account for the 945 // '@' after the username/password. 946 if (*offset_for_adjustment > 947 static_cast<size_t>(nonempty_component->end())) { 948 *offset_for_adjustment -= (nonempty_component->len + 1); 949 } else if (*offset_for_adjustment > 950 static_cast<size_t>(nonempty_component->begin)) { 951 *offset_for_adjustment = std::wstring::npos; 952 } 953 } 954 } 955 } else { 956 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, 957 &new_parsed->username, offset_for_adjustment); 958 if (parsed.password.is_valid()) 959 url_string.push_back(':'); 960 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, 961 &new_parsed->password, offset_for_adjustment); 962 if (parsed.username.is_valid() || parsed.password.is_valid()) 963 url_string.push_back('@'); 964 } 965 if (prefix_end) 966 *prefix_end = static_cast<size_t>(url_string.length()); 967 968 AppendFormattedHost(url, languages, &url_string, new_parsed, 969 offset_for_adjustment); 970 971 // Port. 972 if (parsed.port.is_nonempty()) { 973 url_string.push_back(':'); 974 new_parsed->port.begin = url_string.length(); 975 url_string.insert(url_string.end(), 976 spec.begin() + parsed.port.begin, 977 spec.begin() + parsed.port.end()); 978 new_parsed->port.len = url_string.length() - new_parsed->port.begin; 979 } else { 980 new_parsed->port.reset(); 981 } 982 983 // Path and query both get the same general unescape & convert treatment. 984 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || 985 !CanStripTrailingSlash(url)) { 986 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, 987 &new_parsed->path, offset_for_adjustment); 988 } 989 if (parsed.query.is_valid()) 990 url_string.push_back('?'); 991 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, 992 &new_parsed->query, offset_for_adjustment); 993 994 // Reference is stored in valid, unescaped UTF-8, so we can just convert. 995 if (parsed.ref.is_valid()) { 996 url_string.push_back('#'); 997 new_parsed->ref.begin = url_string.length(); 998 size_t offset_past_current_output = 999 ((*offset_for_adjustment == std::wstring::npos) || 1000 (*offset_for_adjustment < url_string.length())) ? 1001 std::wstring::npos : (*offset_for_adjustment - url_string.length()); 1002 size_t* offset_into_ref = 1003 (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ? 1004 NULL : &offset_past_current_output; 1005 if (parsed.ref.len > 0) { 1006 url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin, 1007 parsed.ref.len), 1008 offset_into_ref)); 1009 } 1010 new_parsed->ref.len = url_string.length() - new_parsed->ref.begin; 1011 if (offset_into_ref) { 1012 *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ? 1013 std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref); 1014 } else if (offset_past_current_output != std::wstring::npos) { 1015 // We clamped the offset near the beginning of this function to ensure it 1016 // was within the input URL. If we reach here, the input was something 1017 // invalid and non-parseable such that the offset was past any component 1018 // we could figure out. In this case it won't be represented in the 1019 // output string, so reset it. 1020 *offset_for_adjustment = std::wstring::npos; 1021 } 1022 } 1023 1024 // If we need to strip out http do it after the fact. This way we don't need 1025 // to worry about how offset_for_adjustment is interpreted. 1026 const size_t kHTTPSize = arraysize(kHTTP) - 1; 1027 if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) { 1028 url_string = url_string.substr(kHTTPSize); 1029 if (*offset_for_adjustment != std::wstring::npos) { 1030 if (*offset_for_adjustment < kHTTPSize) 1031 *offset_for_adjustment = std::wstring::npos; 1032 else 1033 *offset_for_adjustment -= kHTTPSize; 1034 } 1035 if (prefix_end) 1036 *prefix_end -= kHTTPSize; 1037 1038 // Adjust new_parsed. 1039 DCHECK(new_parsed->scheme.is_valid()); 1040 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. 1041 new_parsed->scheme.reset(); 1042 AdjustComponents(delta, new_parsed); 1043 } 1044 1045 return url_string; 1046} 1047 1048} // namespace 1049 1050const FormatUrlType kFormatUrlOmitNothing = 0; 1051const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; 1052const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; 1053const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; 1054const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | 1055 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; 1056 1057// TODO(viettrungluu): We don't want non-POD globals; change this. 1058std::multiset<int> explicitly_allowed_ports; 1059 1060GURL FilePathToFileURL(const FilePath& path) { 1061 // Produce a URL like "file:///C:/foo" for a regular file, or 1062 // "file://///server/path" for UNC. The URL canonicalizer will fix up the 1063 // latter case to be the canonical UNC form: "file://server/path" 1064 FilePath::StringType url_string(kFileURLPrefix); 1065 url_string.append(path.value()); 1066 1067 // Now do replacement of some characters. Since we assume the input is a 1068 // literal filename, anything the URL parser might consider special should 1069 // be escaped here. 1070 1071 // must be the first substitution since others will introduce percents as the 1072 // escape character 1073 ReplaceSubstringsAfterOffset(&url_string, 0, 1074 FILE_PATH_LITERAL("%"), FILE_PATH_LITERAL("%25")); 1075 1076 // semicolon is supposed to be some kind of separator according to RFC 2396 1077 ReplaceSubstringsAfterOffset(&url_string, 0, 1078 FILE_PATH_LITERAL(";"), FILE_PATH_LITERAL("%3B")); 1079 1080 ReplaceSubstringsAfterOffset(&url_string, 0, 1081 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); 1082 1083#if defined(OS_POSIX) 1084 ReplaceSubstringsAfterOffset(&url_string, 0, 1085 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); 1086#endif 1087 1088 return GURL(url_string); 1089} 1090 1091std::wstring GetSpecificHeader(const std::wstring& headers, 1092 const std::wstring& name) { 1093 return GetSpecificHeaderT(headers, name); 1094} 1095 1096std::string GetSpecificHeader(const std::string& headers, 1097 const std::string& name) { 1098 return GetSpecificHeaderT(headers, name); 1099} 1100 1101bool DecodeCharset(const std::string& input, 1102 std::string* decoded_charset, 1103 std::string* value) { 1104 StringTokenizer t(input, "'"); 1105 t.set_options(StringTokenizer::RETURN_DELIMS); 1106 std::string temp_charset; 1107 std::string temp_value; 1108 int numDelimsSeen = 0; 1109 while (t.GetNext()) { 1110 if (t.token_is_delim()) { 1111 ++numDelimsSeen; 1112 continue; 1113 } else { 1114 switch (numDelimsSeen) { 1115 case 0: 1116 temp_charset = t.token(); 1117 break; 1118 case 1: 1119 // Language is ignored. 1120 break; 1121 case 2: 1122 temp_value = t.token(); 1123 break; 1124 default: 1125 return false; 1126 } 1127 } 1128 } 1129 if (numDelimsSeen != 2) 1130 return false; 1131 if (temp_charset.empty() || temp_value.empty()) 1132 return false; 1133 decoded_charset->swap(temp_charset); 1134 value->swap(temp_value); 1135 return true; 1136} 1137 1138std::string GetFileNameFromCD(const std::string& header, 1139 const std::string& referrer_charset) { 1140 std::string decoded; 1141 std::string param_value = GetHeaderParamValue(header, "filename*", 1142 QuoteRule::KEEP_OUTER_QUOTES); 1143 if (!param_value.empty()) { 1144 if (param_value.find('"') == std::string::npos) { 1145 std::string charset; 1146 std::string value; 1147 if (DecodeCharset(param_value, &charset, &value)) { 1148 // RFC 5987 value should be ASCII-only. 1149 if (!IsStringASCII(value)) 1150 return std::string(); 1151 std::string tmp = UnescapeURLComponent( 1152 value, 1153 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); 1154 if (base::ConvertToUtf8AndNormalize(tmp, charset, &decoded)) 1155 return decoded; 1156 } 1157 } 1158 } 1159 param_value = GetHeaderParamValue(header, "filename", 1160 QuoteRule::REMOVE_OUTER_QUOTES); 1161 if (param_value.empty()) { 1162 // Some servers use 'name' parameter. 1163 param_value = GetHeaderParamValue(header, "name", 1164 QuoteRule::REMOVE_OUTER_QUOTES); 1165 } 1166 if (param_value.empty()) 1167 return std::string(); 1168 if (DecodeParamValue(param_value, referrer_charset, &decoded)) 1169 return decoded; 1170 return std::string(); 1171} 1172 1173std::wstring GetHeaderParamValue(const std::wstring& field, 1174 const std::wstring& param_name, 1175 QuoteRule::Type quote_rule) { 1176 return GetHeaderParamValueT(field, param_name, quote_rule); 1177} 1178 1179std::string GetHeaderParamValue(const std::string& field, 1180 const std::string& param_name, 1181 QuoteRule::Type quote_rule) { 1182 return GetHeaderParamValueT(field, param_name, quote_rule); 1183} 1184 1185// TODO(brettw) bug 734373: check the scripts for each host component and 1186// don't un-IDN-ize if there is more than one. Alternatively, only IDN for 1187// scripts that the user has installed. For now, just put the entire 1188// path through IDN. Maybe this feature can be implemented in ICU itself? 1189// 1190// We may want to skip this step in the case of file URLs to allow unicode 1191// UNC hostnames regardless of encodings. 1192std::wstring IDNToUnicode(const char* host, 1193 size_t host_len, 1194 const std::wstring& languages, 1195 size_t* offset_for_adjustment) { 1196 // Convert the ASCII input to a wide string for ICU. 1197 string16 input16; 1198 input16.reserve(host_len); 1199 input16.insert(input16.end(), host, host + host_len); 1200 1201 string16 out16; 1202 size_t output_offset = offset_for_adjustment ? 1203 *offset_for_adjustment : std::wstring::npos; 1204 1205 // Do each component of the host separately, since we enforce script matching 1206 // on a per-component basis. 1207 for (size_t component_start = 0, component_end; 1208 component_start < input16.length(); 1209 component_start = component_end + 1) { 1210 // Find the end of the component. 1211 component_end = input16.find('.', component_start); 1212 if (component_end == string16::npos) 1213 component_end = input16.length(); // For getting the last component. 1214 size_t component_length = component_end - component_start; 1215 1216 size_t output_component_start = out16.length(); 1217 bool converted_idn = false; 1218 if (component_end > component_start) { 1219 // Add the substring that we just found. 1220 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, 1221 component_length, languages, &out16); 1222 } 1223 size_t output_component_length = out16.length() - output_component_start; 1224 1225 if ((output_offset != std::wstring::npos) && 1226 (*offset_for_adjustment > component_start)) { 1227 if ((*offset_for_adjustment < component_end) && converted_idn) 1228 output_offset = std::wstring::npos; 1229 else 1230 output_offset += output_component_length - component_length; 1231 } 1232 1233 // Need to add the dot we just found (if we found one). 1234 if (component_end < input16.length()) 1235 out16.push_back('.'); 1236 } 1237 1238 if (offset_for_adjustment) 1239 *offset_for_adjustment = output_offset; 1240 1241 return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment); 1242} 1243 1244std::string CanonicalizeHost(const std::string& host, 1245 url_canon::CanonHostInfo* host_info) { 1246 // Try to canonicalize the host. 1247 const url_parse::Component raw_host_component( 1248 0, static_cast<int>(host.length())); 1249 std::string canon_host; 1250 url_canon::StdStringCanonOutput canon_host_output(&canon_host); 1251 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, 1252 &canon_host_output, host_info); 1253 1254 if (host_info->out_host.is_nonempty() && 1255 host_info->family != url_canon::CanonHostInfo::BROKEN) { 1256 // Success! Assert that there's no extra garbage. 1257 canon_host_output.Complete(); 1258 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); 1259 } else { 1260 // Empty host, or canonicalization failed. We'll return empty. 1261 canon_host.clear(); 1262 } 1263 1264 return canon_host; 1265} 1266 1267std::string CanonicalizeHost(const std::wstring& host, 1268 url_canon::CanonHostInfo* host_info) { 1269 std::string converted_host; 1270 WideToUTF8(host.c_str(), host.length(), &converted_host); 1271 return CanonicalizeHost(converted_host, host_info); 1272} 1273 1274std::string GetDirectoryListingHeader(const string16& title) { 1275 static const base::StringPiece header( 1276 NetModule::GetResource(IDR_DIR_HEADER_HTML)); 1277 // This can be null in unit tests. 1278 DLOG_IF(WARNING, header.empty()) << 1279 "Missing resource: directory listing header"; 1280 1281 std::string result; 1282 if (!header.empty()) 1283 result.assign(header.data(), header.size()); 1284 1285 result.append("<script>start("); 1286 base::JsonDoubleQuote(title, true, &result); 1287 result.append(");</script>\n"); 1288 1289 return result; 1290} 1291 1292inline bool IsHostCharAlpha(char c) { 1293 // We can just check lowercase because uppercase characters have already been 1294 // normalized. 1295 return (c >= 'a') && (c <= 'z'); 1296} 1297 1298inline bool IsHostCharDigit(char c) { 1299 return (c >= '0') && (c <= '9'); 1300} 1301 1302bool IsCanonicalizedHostCompliant(const std::string& host, 1303 const std::string& desired_tld) { 1304 if (host.empty()) 1305 return false; 1306 1307 bool in_component = false; 1308 bool most_recent_component_started_alpha = false; 1309 bool last_char_was_hyphen_or_underscore = false; 1310 1311 for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) { 1312 const char c = *i; 1313 if (!in_component) { 1314 most_recent_component_started_alpha = IsHostCharAlpha(c); 1315 if (!most_recent_component_started_alpha && !IsHostCharDigit(c)) 1316 return false; 1317 in_component = true; 1318 } else { 1319 if (c == '.') { 1320 if (last_char_was_hyphen_or_underscore) 1321 return false; 1322 in_component = false; 1323 } else if (IsHostCharAlpha(c) || IsHostCharDigit(c)) { 1324 last_char_was_hyphen_or_underscore = false; 1325 } else if ((c == '-') || (c == '_')) { 1326 last_char_was_hyphen_or_underscore = true; 1327 } else { 1328 return false; 1329 } 1330 } 1331 } 1332 1333 return most_recent_component_started_alpha || 1334 (!desired_tld.empty() && IsHostCharAlpha(desired_tld[0])); 1335} 1336 1337std::string GetDirectoryListingEntry(const string16& name, 1338 const std::string& raw_bytes, 1339 bool is_dir, 1340 int64 size, 1341 Time modified) { 1342 std::string result; 1343 result.append("<script>addRow("); 1344 base::JsonDoubleQuote(name, true, &result); 1345 result.append(","); 1346 if (raw_bytes.empty()) { 1347 base::JsonDoubleQuote(EscapePath(UTF16ToUTF8(name)), 1348 true, &result); 1349 } else { 1350 base::JsonDoubleQuote(EscapePath(raw_bytes), true, &result); 1351 } 1352 if (is_dir) { 1353 result.append(",1,"); 1354 } else { 1355 result.append(",0,"); 1356 } 1357 1358 base::JsonDoubleQuote( 1359 FormatBytes(size, GetByteDisplayUnits(size), true), 1360 true, 1361 &result); 1362 1363 result.append(","); 1364 1365 string16 modified_str; 1366 // |modified| can be NULL in FTP listings. 1367 if (!modified.is_null()) { 1368 modified_str = base::TimeFormatShortDateAndTime(modified); 1369 } 1370 base::JsonDoubleQuote(modified_str, true, &result); 1371 1372 result.append(");</script>\n"); 1373 1374 return result; 1375} 1376 1377string16 StripWWW(const string16& text) { 1378 const string16 www(ASCIIToUTF16("www.")); 1379 return (text.compare(0, www.length(), www) == 0) ? 1380 text.substr(www.length()) : text; 1381} 1382 1383string16 GetSuggestedFilename(const GURL& url, 1384 const std::string& content_disposition, 1385 const std::string& referrer_charset, 1386 const string16& default_name) { 1387 // TODO: this function to be updated to match the httpbis recommendations. 1388 // Talk to abarth for the latest news. 1389 1390 // We don't translate this fallback string, "download". If localization is 1391 // needed, the caller should provide localized fallback default_name. 1392 static const char* kFinalFallbackName = "download"; 1393 1394 // about: and data: URLs don't have file names, but esp. data: URLs may 1395 // contain parts that look like ones (i.e., contain a slash). 1396 // Therefore we don't attempt to divine a file name out of them. 1397 if (url.SchemeIs("about") || url.SchemeIs("data")) { 1398 return default_name.empty() ? ASCIIToUTF16(kFinalFallbackName) 1399 : default_name; 1400 } 1401 1402 std::string filename = GetFileNameFromCD(content_disposition, 1403 referrer_charset); 1404 1405 if (!filename.empty()) { 1406 // Replace any path information the server may have sent, by changing 1407 // path separators with underscores. 1408 ReplaceSubstringsAfterOffset(&filename, 0, "/", "_"); 1409 ReplaceSubstringsAfterOffset(&filename, 0, "\\", "_"); 1410 1411 // Next, remove "." from the beginning and end of the file name to avoid 1412 // tricks with hidden files, "..", and "." 1413 TrimString(filename, ".", &filename); 1414 } 1415 if (filename.empty()) { 1416 if (url.is_valid()) { 1417 const std::string unescaped_url_filename = UnescapeURLComponent( 1418 url.ExtractFileName(), 1419 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); 1420 1421 // The URL's path should be escaped UTF-8, but may not be. 1422 std::string decoded_filename = unescaped_url_filename; 1423 if (!IsStringASCII(decoded_filename)) { 1424 bool ignore; 1425 // TODO(jshin): this is probably not robust enough. To be sure, we 1426 // need encoding detection. 1427 DecodeWord(unescaped_url_filename, referrer_charset, &ignore, 1428 &decoded_filename); 1429 } 1430 1431 filename = decoded_filename; 1432 } 1433 } 1434 1435#if defined(OS_WIN) 1436 { // Handle CreateFile() stripping trailing dots and spaces on filenames 1437 // http://support.microsoft.com/kb/115827 1438 std::string::size_type pos = filename.find_last_not_of(" ."); 1439 if (pos == std::string::npos) 1440 filename.resize(0); 1441 else 1442 filename.resize(++pos); 1443 } 1444#endif 1445 // Trim '.' once more. 1446 TrimString(filename, ".", &filename); 1447 1448 // If there's no filename or it gets trimed to be empty, use 1449 // the URL hostname or default_name 1450 if (filename.empty()) { 1451 if (!default_name.empty()) { 1452 return default_name; 1453 } else if (url.is_valid()) { 1454 // Some schemes (e.g. file) do not have a hostname. Even though it's 1455 // not likely to reach here, let's hardcode the last fallback name. 1456 // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) 1457 filename = url.host().empty() ? kFinalFallbackName : url.host(); 1458 } else { 1459 NOTREACHED(); 1460 } 1461 } 1462 1463#if defined(OS_WIN) 1464 string16 path = UTF8ToUTF16(filename); 1465 file_util::ReplaceIllegalCharactersInPath(&path, '-'); 1466 return path; 1467#else 1468 std::string path = filename; 1469 file_util::ReplaceIllegalCharactersInPath(&path, '-'); 1470 return UTF8ToUTF16(path); 1471#endif 1472} 1473 1474bool IsPortAllowedByDefault(int port) { 1475 int array_size = arraysize(kRestrictedPorts); 1476 for (int i = 0; i < array_size; i++) { 1477 if (kRestrictedPorts[i] == port) { 1478 return false; 1479 } 1480 } 1481 return true; 1482} 1483 1484bool IsPortAllowedByFtp(int port) { 1485 int array_size = arraysize(kAllowedFtpPorts); 1486 for (int i = 0; i < array_size; i++) { 1487 if (kAllowedFtpPorts[i] == port) { 1488 return true; 1489 } 1490 } 1491 // Port not explicitly allowed by FTP, so return the default restrictions. 1492 return IsPortAllowedByDefault(port); 1493} 1494 1495bool IsPortAllowedByOverride(int port) { 1496 if (explicitly_allowed_ports.empty()) 1497 return false; 1498 1499 return explicitly_allowed_ports.count(port) > 0; 1500} 1501 1502int SetNonBlocking(int fd) { 1503#if defined(OS_WIN) 1504 unsigned long no_block = 1; 1505 return ioctlsocket(fd, FIONBIO, &no_block); 1506#elif defined(OS_POSIX) 1507 int flags = fcntl(fd, F_GETFL, 0); 1508 if (-1 == flags) 1509 return flags; 1510 return fcntl(fd, F_SETFL, flags | O_NONBLOCK); 1511#endif 1512} 1513 1514bool ParseHostAndPort(std::string::const_iterator host_and_port_begin, 1515 std::string::const_iterator host_and_port_end, 1516 std::string* host, 1517 int* port) { 1518 if (host_and_port_begin >= host_and_port_end) 1519 return false; 1520 1521 // When using url_parse, we use char*. 1522 const char* auth_begin = &(*host_and_port_begin); 1523 int auth_len = host_and_port_end - host_and_port_begin; 1524 1525 url_parse::Component auth_component(0, auth_len); 1526 url_parse::Component username_component; 1527 url_parse::Component password_component; 1528 url_parse::Component hostname_component; 1529 url_parse::Component port_component; 1530 1531 url_parse::ParseAuthority(auth_begin, auth_component, &username_component, 1532 &password_component, &hostname_component, &port_component); 1533 1534 // There shouldn't be a username/password. 1535 if (username_component.is_valid() || password_component.is_valid()) 1536 return false; 1537 1538 if (!hostname_component.is_nonempty()) 1539 return false; // Failed parsing. 1540 1541 int parsed_port_number = -1; 1542 if (port_component.is_nonempty()) { 1543 parsed_port_number = url_parse::ParsePort(auth_begin, port_component); 1544 1545 // If parsing failed, port_number will be either PORT_INVALID or 1546 // PORT_UNSPECIFIED, both of which are negative. 1547 if (parsed_port_number < 0) 1548 return false; // Failed parsing the port number. 1549 } 1550 1551 if (port_component.len == 0) 1552 return false; // Reject inputs like "foo:" 1553 1554 // Pass results back to caller. 1555 host->assign(auth_begin + hostname_component.begin, hostname_component.len); 1556 *port = parsed_port_number; 1557 1558 return true; // Success. 1559} 1560 1561bool ParseHostAndPort(const std::string& host_and_port, 1562 std::string* host, 1563 int* port) { 1564 return ParseHostAndPort( 1565 host_and_port.begin(), host_and_port.end(), host, port); 1566} 1567 1568std::string GetHostAndPort(const GURL& url) { 1569 // For IPv6 literals, GURL::host() already includes the brackets so it is 1570 // safe to just append a colon. 1571 return base::StringPrintf("%s:%d", url.host().c_str(), 1572 url.EffectiveIntPort()); 1573} 1574 1575std::string GetHostAndOptionalPort(const GURL& url) { 1576 // For IPv6 literals, GURL::host() already includes the brackets 1577 // so it is safe to just append a colon. 1578 if (url.has_port()) 1579 return base::StringPrintf("%s:%s", url.host().c_str(), url.port().c_str()); 1580 return url.host(); 1581} 1582 1583std::string NetAddressToString(const struct addrinfo* net_address) { 1584 return NetAddressToString(net_address->ai_addr, net_address->ai_addrlen); 1585} 1586 1587std::string NetAddressToString(const struct sockaddr* net_address, 1588 socklen_t address_len) { 1589#if defined(OS_WIN) 1590 EnsureWinsockInit(); 1591#endif 1592 1593 // This buffer is large enough to fit the biggest IPv6 string. 1594 char buffer[INET6_ADDRSTRLEN]; 1595 1596 int result = getnameinfo(net_address, address_len, buffer, sizeof(buffer), 1597 NULL, 0, NI_NUMERICHOST); 1598 1599 if (result != 0) { 1600 DVLOG(1) << "getnameinfo() failed with " << result << ": " 1601 << gai_strerror(result); 1602 buffer[0] = '\0'; 1603 } 1604 return std::string(buffer); 1605} 1606 1607std::string NetAddressToStringWithPort(const struct addrinfo* net_address) { 1608 return NetAddressToStringWithPort( 1609 net_address->ai_addr, net_address->ai_addrlen); 1610} 1611std::string NetAddressToStringWithPort(const struct sockaddr* net_address, 1612 socklen_t address_len) { 1613 std::string ip_address_string = NetAddressToString(net_address, address_len); 1614 if (ip_address_string.empty()) 1615 return std::string(); // Failed. 1616 1617 int port = GetPortFromSockaddr(net_address, address_len); 1618 1619 if (ip_address_string.find(':') != std::string::npos) { 1620 // Surround with square brackets to avoid ambiguity. 1621 return base::StringPrintf("[%s]:%d", ip_address_string.c_str(), port); 1622 } 1623 1624 return base::StringPrintf("%s:%d", ip_address_string.c_str(), port); 1625} 1626 1627std::string GetHostName() { 1628#if defined(OS_WIN) 1629 EnsureWinsockInit(); 1630#endif 1631 1632 // Host names are limited to 255 bytes. 1633 char buffer[256]; 1634 int result = gethostname(buffer, sizeof(buffer)); 1635 if (result != 0) { 1636 DVLOG(1) << "gethostname() failed with " << result; 1637 buffer[0] = '\0'; 1638 } 1639 return std::string(buffer); 1640} 1641 1642void GetIdentityFromURL(const GURL& url, 1643 string16* username, 1644 string16* password) { 1645 UnescapeRule::Type flags = 1646 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS; 1647 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); 1648 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); 1649} 1650 1651std::string GetHostOrSpecFromURL(const GURL& url) { 1652 return url.has_host() ? net::TrimEndingDot(url.host()) : url.spec(); 1653} 1654 1655void AppendFormattedHost(const GURL& url, 1656 const std::wstring& languages, 1657 std::wstring* output, 1658 url_parse::Parsed* new_parsed, 1659 size_t* offset_for_adjustment) { 1660 DCHECK(output); 1661 const url_parse::Component& host = 1662 url.parsed_for_possibly_invalid_spec().host; 1663 1664 if (host.is_nonempty()) { 1665 // Handle possible IDN in the host name. 1666 int new_host_begin = static_cast<int>(output->length()); 1667 if (new_parsed) 1668 new_parsed->host.begin = new_host_begin; 1669 size_t offset_past_current_output = 1670 (!offset_for_adjustment || 1671 (*offset_for_adjustment == std::wstring::npos) || 1672 (*offset_for_adjustment < output->length())) ? 1673 std::wstring::npos : (*offset_for_adjustment - output->length()); 1674 size_t* offset_into_host = 1675 (offset_past_current_output >= static_cast<size_t>(host.len)) ? 1676 NULL : &offset_past_current_output; 1677 1678 const std::string& spec = url.possibly_invalid_spec(); 1679 DCHECK(host.begin >= 0 && 1680 ((spec.length() == 0 && host.begin == 0) || 1681 host.begin < static_cast<int>(spec.length()))); 1682 output->append(net::IDNToUnicode(&spec[host.begin], 1683 static_cast<size_t>(host.len), languages, offset_into_host)); 1684 1685 int new_host_len = static_cast<int>(output->length()) - new_host_begin; 1686 if (new_parsed) 1687 new_parsed->host.len = new_host_len; 1688 if (offset_into_host) { 1689 *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ? 1690 std::wstring::npos : (new_host_begin + *offset_into_host); 1691 } else if (offset_past_current_output != std::wstring::npos) { 1692 *offset_for_adjustment += new_host_len - host.len; 1693 } 1694 } else if (new_parsed) { 1695 new_parsed->host.reset(); 1696 } 1697} 1698 1699// TODO(viettrungluu): convert the wstring |FormatUrlInternal()|. 1700string16 FormatUrl(const GURL& url, 1701 const std::string& languages, 1702 FormatUrlTypes format_types, 1703 UnescapeRule::Type unescape_rules, 1704 url_parse::Parsed* new_parsed, 1705 size_t* prefix_end, 1706 size_t* offset_for_adjustment) { 1707 return WideToUTF16Hack( 1708 FormatUrlInternal(url, ASCIIToWide(languages), format_types, 1709 unescape_rules, new_parsed, prefix_end, 1710 offset_for_adjustment)); 1711} 1712 1713bool CanStripTrailingSlash(const GURL& url) { 1714 // Omit the path only for standard, non-file URLs with nothing but "/" after 1715 // the hostname. 1716 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() && 1717 !url.has_ref() && url.path() == "/"; 1718} 1719 1720GURL SimplifyUrlForRequest(const GURL& url) { 1721 DCHECK(url.is_valid()); 1722 GURL::Replacements replacements; 1723 replacements.ClearUsername(); 1724 replacements.ClearPassword(); 1725 replacements.ClearRef(); 1726 return url.ReplaceComponents(replacements); 1727} 1728 1729// Specifies a comma separated list of port numbers that should be accepted 1730// despite bans. If the string is invalid no allowed ports are stored. 1731void SetExplicitlyAllowedPorts(const std::string& allowed_ports) { 1732 if (allowed_ports.empty()) 1733 return; 1734 1735 std::multiset<int> ports; 1736 size_t last = 0; 1737 size_t size = allowed_ports.size(); 1738 // The comma delimiter. 1739 const std::string::value_type kComma = ','; 1740 1741 // Overflow is still possible for evil user inputs. 1742 for (size_t i = 0; i <= size; ++i) { 1743 // The string should be composed of only digits and commas. 1744 if (i != size && !IsAsciiDigit(allowed_ports[i]) && 1745 (allowed_ports[i] != kComma)) 1746 return; 1747 if (i == size || allowed_ports[i] == kComma) { 1748 if (i > last) { 1749 int port; 1750 base::StringToInt(allowed_ports.begin() + last, 1751 allowed_ports.begin() + i, 1752 &port); 1753 ports.insert(port); 1754 } 1755 last = i + 1; 1756 } 1757 } 1758 explicitly_allowed_ports = ports; 1759} 1760 1761ScopedPortException::ScopedPortException(int port) : port_(port) { 1762 explicitly_allowed_ports.insert(port); 1763} 1764 1765ScopedPortException::~ScopedPortException() { 1766 std::multiset<int>::iterator it = explicitly_allowed_ports.find(port_); 1767 if (it != explicitly_allowed_ports.end()) 1768 explicitly_allowed_ports.erase(it); 1769 else 1770 NOTREACHED(); 1771} 1772 1773enum IPv6SupportStatus { 1774 IPV6_CANNOT_CREATE_SOCKETS, 1775 IPV6_CAN_CREATE_SOCKETS, 1776 IPV6_GETIFADDRS_FAILED, 1777 IPV6_GLOBAL_ADDRESS_MISSING, 1778 IPV6_GLOBAL_ADDRESS_PRESENT, 1779 IPV6_INTERFACE_ARRAY_TOO_SHORT, 1780 IPV6_SUPPORT_MAX // Bounding values for enumeration. 1781}; 1782 1783static void IPv6SupportResults(IPv6SupportStatus result) { 1784 static bool run_once = false; 1785 if (!run_once) { 1786 run_once = true; 1787 UMA_HISTOGRAM_ENUMERATION("Net.IPv6Status", result, IPV6_SUPPORT_MAX); 1788 } else { 1789 UMA_HISTOGRAM_ENUMERATION("Net.IPv6Status_retest", result, 1790 IPV6_SUPPORT_MAX); 1791 } 1792} 1793 1794// TODO(jar): The following is a simple estimate of IPv6 support. We may need 1795// to do a test resolution, and a test connection, to REALLY verify support. 1796// static 1797bool IPv6Supported() { 1798#ifdef ANDROID 1799 // Android does not have the ifaddrs.h header 1800 return false; 1801#elif defined(OS_POSIX) 1802 int test_socket = socket(AF_INET6, SOCK_STREAM, 0); 1803 if (test_socket == -1) { 1804 IPv6SupportResults(IPV6_CANNOT_CREATE_SOCKETS); 1805 return false; 1806 } 1807 close(test_socket); 1808 1809 // Check to see if any interface has a IPv6 address. 1810 struct ifaddrs* interface_addr = NULL; 1811 int rv = getifaddrs(&interface_addr); 1812 if (rv != 0) { 1813 IPv6SupportResults(IPV6_GETIFADDRS_FAILED); 1814 return true; // Don't yet block IPv6. 1815 } 1816 1817 bool found_ipv6 = false; 1818 for (struct ifaddrs* interface = interface_addr; 1819 interface != NULL; 1820 interface = interface->ifa_next) { 1821 if (!(IFF_UP & interface->ifa_flags)) 1822 continue; 1823 if (IFF_LOOPBACK & interface->ifa_flags) 1824 continue; 1825 struct sockaddr* addr = interface->ifa_addr; 1826 if (!addr) 1827 continue; 1828 if (addr->sa_family != AF_INET6) 1829 continue; 1830 // Safe cast since this is AF_INET6. 1831 struct sockaddr_in6* addr_in6 = 1832 reinterpret_cast<struct sockaddr_in6*>(addr); 1833 struct in6_addr* sin6_addr = &addr_in6->sin6_addr; 1834 if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr)) 1835 continue; 1836 found_ipv6 = true; 1837 break; 1838 } 1839 freeifaddrs(interface_addr); 1840 if (!found_ipv6) { 1841 IPv6SupportResults(IPV6_GLOBAL_ADDRESS_MISSING); 1842 return false; 1843 } 1844 1845 IPv6SupportResults(IPV6_GLOBAL_ADDRESS_PRESENT); 1846 return true; 1847#elif defined(OS_WIN) 1848 EnsureWinsockInit(); 1849 SOCKET test_socket = socket(AF_INET6, SOCK_STREAM, 0); 1850 if (test_socket == INVALID_SOCKET) { 1851 IPv6SupportResults(IPV6_CANNOT_CREATE_SOCKETS); 1852 return false; 1853 } 1854 closesocket(test_socket); 1855 1856 // TODO(jar): Bug 40851: The remainder of probe is not working. 1857 IPv6SupportResults(IPV6_CAN_CREATE_SOCKETS); // Record status. 1858 return true; // Don't disable IPv6 yet. 1859 1860 // Check to see if any interface has a IPv6 address. 1861 // Note: The original IPv6 socket can't be used here, as WSAIoctl() will fail. 1862 test_socket = socket(AF_INET, SOCK_STREAM, 0); 1863 DCHECK(test_socket != INVALID_SOCKET); 1864 INTERFACE_INFO interfaces[128]; 1865 DWORD bytes_written = 0; 1866 int rv = WSAIoctl(test_socket, SIO_GET_INTERFACE_LIST, NULL, 0, interfaces, 1867 sizeof(interfaces), &bytes_written, NULL, NULL); 1868 closesocket(test_socket); 1869 1870 if (0 != rv) { 1871 if (WSAGetLastError() == WSAEFAULT) 1872 IPv6SupportResults(IPV6_INTERFACE_ARRAY_TOO_SHORT); 1873 else 1874 IPv6SupportResults(IPV6_GETIFADDRS_FAILED); 1875 return true; // Don't yet block IPv6. 1876 } 1877 size_t interface_count = bytes_written / sizeof(interfaces[0]); 1878 for (size_t i = 0; i < interface_count; ++i) { 1879 INTERFACE_INFO* interface = &interfaces[i]; 1880 if (!(IFF_UP & interface->iiFlags)) 1881 continue; 1882 if (IFF_LOOPBACK & interface->iiFlags) 1883 continue; 1884 sockaddr* addr = &interface->iiAddress.Address; 1885 if (addr->sa_family != AF_INET6) 1886 continue; 1887 struct in6_addr* sin6_addr = &interface->iiAddress.AddressIn6.sin6_addr; 1888 if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr)) 1889 continue; 1890 IPv6SupportResults(IPV6_GLOBAL_ADDRESS_PRESENT); 1891 return true; 1892 } 1893 1894 IPv6SupportResults(IPV6_GLOBAL_ADDRESS_MISSING); 1895 return false; 1896#else 1897 NOTIMPLEMENTED(); 1898 return true; 1899#endif // defined(various platforms) 1900} 1901 1902bool HaveOnlyLoopbackAddresses() { 1903#if defined(ANDROID) 1904 // Android has no <ifaddrs.h> 1905 return false; 1906#elif defined(OS_POSIX) 1907 struct ifaddrs* interface_addr = NULL; 1908 int rv = getifaddrs(&interface_addr); 1909 if (rv != 0) { 1910 DVLOG(1) << "getifaddrs() failed with errno = " << errno; 1911 return false; 1912 } 1913 1914 bool result = true; 1915 for (struct ifaddrs* interface = interface_addr; 1916 interface != NULL; 1917 interface = interface->ifa_next) { 1918 if (!(IFF_UP & interface->ifa_flags)) 1919 continue; 1920 if (IFF_LOOPBACK & interface->ifa_flags) 1921 continue; 1922 const struct sockaddr* addr = interface->ifa_addr; 1923 if (!addr) 1924 continue; 1925 if (addr->sa_family == AF_INET6) { 1926 // Safe cast since this is AF_INET6. 1927 const struct sockaddr_in6* addr_in6 = 1928 reinterpret_cast<const struct sockaddr_in6*>(addr); 1929 const struct in6_addr* sin6_addr = &addr_in6->sin6_addr; 1930 if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr)) 1931 continue; 1932 } 1933 if (addr->sa_family != AF_INET6 && addr->sa_family != AF_INET) 1934 continue; 1935 1936 result = false; 1937 break; 1938 } 1939 freeifaddrs(interface_addr); 1940 return result; 1941#else 1942 NOTIMPLEMENTED(); 1943 return false; 1944#endif // defined(various platforms) 1945} 1946 1947bool ParseIPLiteralToNumber(const std::string& ip_literal, 1948 IPAddressNumber* ip_number) { 1949 // |ip_literal| could be either a IPv4 or an IPv6 literal. If it contains 1950 // a colon however, it must be an IPv6 address. 1951 if (ip_literal.find(':') != std::string::npos) { 1952 // GURL expects IPv6 hostnames to be surrounded with brackets. 1953 std::string host_brackets = "[" + ip_literal + "]"; 1954 url_parse::Component host_comp(0, host_brackets.size()); 1955 1956 // Try parsing the hostname as an IPv6 literal. 1957 ip_number->resize(16); // 128 bits. 1958 return url_canon::IPv6AddressToNumber(host_brackets.data(), 1959 host_comp, 1960 &(*ip_number)[0]); 1961 } 1962 1963 // Otherwise the string is an IPv4 address. 1964 ip_number->resize(4); // 32 bits. 1965 url_parse::Component host_comp(0, ip_literal.size()); 1966 int num_components; 1967 url_canon::CanonHostInfo::Family family = url_canon::IPv4AddressToNumber( 1968 ip_literal.data(), host_comp, &(*ip_number)[0], &num_components); 1969 return family == url_canon::CanonHostInfo::IPV4; 1970} 1971 1972IPAddressNumber ConvertIPv4NumberToIPv6Number( 1973 const IPAddressNumber& ipv4_number) { 1974 DCHECK(ipv4_number.size() == 4); 1975 1976 // IPv4-mapped addresses are formed by: 1977 // <80 bits of zeros> + <16 bits of ones> + <32-bit IPv4 address>. 1978 IPAddressNumber ipv6_number; 1979 ipv6_number.reserve(16); 1980 ipv6_number.insert(ipv6_number.end(), 10, 0); 1981 ipv6_number.push_back(0xFF); 1982 ipv6_number.push_back(0xFF); 1983 ipv6_number.insert(ipv6_number.end(), ipv4_number.begin(), ipv4_number.end()); 1984 return ipv6_number; 1985} 1986 1987bool ParseCIDRBlock(const std::string& cidr_literal, 1988 IPAddressNumber* ip_number, 1989 size_t* prefix_length_in_bits) { 1990 // We expect CIDR notation to match one of these two templates: 1991 // <IPv4-literal> "/" <number of bits> 1992 // <IPv6-literal> "/" <number of bits> 1993 1994 std::vector<std::string> parts; 1995 base::SplitString(cidr_literal, '/', &parts); 1996 if (parts.size() != 2) 1997 return false; 1998 1999 // Parse the IP address. 2000 if (!ParseIPLiteralToNumber(parts[0], ip_number)) 2001 return false; 2002 2003 // Parse the prefix length. 2004 int number_of_bits = -1; 2005 if (!base::StringToInt(parts[1], &number_of_bits)) 2006 return false; 2007 2008 // Make sure the prefix length is in a valid range. 2009 if (number_of_bits < 0 || 2010 number_of_bits > static_cast<int>(ip_number->size() * 8)) 2011 return false; 2012 2013 *prefix_length_in_bits = static_cast<size_t>(number_of_bits); 2014 return true; 2015} 2016 2017bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number, 2018 const IPAddressNumber& ip_prefix, 2019 size_t prefix_length_in_bits) { 2020 // Both the input IP address and the prefix IP address should be 2021 // either IPv4 or IPv6. 2022 DCHECK(ip_number.size() == 4 || ip_number.size() == 16); 2023 DCHECK(ip_prefix.size() == 4 || ip_prefix.size() == 16); 2024 2025 DCHECK_LE(prefix_length_in_bits, ip_prefix.size() * 8); 2026 2027 // In case we have an IPv6 / IPv4 mismatch, convert the IPv4 addresses to 2028 // IPv6 addresses in order to do the comparison. 2029 if (ip_number.size() != ip_prefix.size()) { 2030 if (ip_number.size() == 4) { 2031 return IPNumberMatchesPrefix(ConvertIPv4NumberToIPv6Number(ip_number), 2032 ip_prefix, prefix_length_in_bits); 2033 } 2034 return IPNumberMatchesPrefix(ip_number, 2035 ConvertIPv4NumberToIPv6Number(ip_prefix), 2036 96 + prefix_length_in_bits); 2037 } 2038 2039 // Otherwise we are comparing two IPv4 addresses, or two IPv6 addresses. 2040 // Compare all the bytes that fall entirely within the prefix. 2041 int num_entire_bytes_in_prefix = prefix_length_in_bits / 8; 2042 for (int i = 0; i < num_entire_bytes_in_prefix; ++i) { 2043 if (ip_number[i] != ip_prefix[i]) 2044 return false; 2045 } 2046 2047 // In case the prefix was not a multiple of 8, there will be 1 byte 2048 // which is only partially masked. 2049 int remaining_bits = prefix_length_in_bits % 8; 2050 if (remaining_bits != 0) { 2051 unsigned char mask = 0xFF << (8 - remaining_bits); 2052 int i = num_entire_bytes_in_prefix; 2053 if ((ip_number[i] & mask) != (ip_prefix[i] & mask)) 2054 return false; 2055 } 2056 2057 return true; 2058} 2059 2060// Returns the port field of the sockaddr in |info|. 2061uint16* GetPortFieldFromAddrinfo(struct addrinfo* info) { 2062 const struct addrinfo* const_info = info; 2063 const uint16* port_field = GetPortFieldFromAddrinfo(const_info); 2064 return const_cast<uint16*>(port_field); 2065} 2066 2067const uint16* GetPortFieldFromAddrinfo(const struct addrinfo* info) { 2068 DCHECK(info); 2069 const struct sockaddr* address = info->ai_addr; 2070 DCHECK(address); 2071 DCHECK_EQ(info->ai_family, address->sa_family); 2072 return GetPortFieldFromSockaddr(address, info->ai_addrlen); 2073} 2074 2075int GetPortFromAddrinfo(const struct addrinfo* info) { 2076 const uint16* port_field = GetPortFieldFromAddrinfo(info); 2077 if (!port_field) 2078 return -1; 2079 return ntohs(*port_field); 2080} 2081 2082const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address, 2083 socklen_t address_len) { 2084 if (address->sa_family == AF_INET) { 2085 DCHECK_LE(sizeof(sockaddr_in), static_cast<size_t>(address_len)); 2086 const struct sockaddr_in* sockaddr = 2087 reinterpret_cast<const struct sockaddr_in*>(address); 2088 return &sockaddr->sin_port; 2089 } else if (address->sa_family == AF_INET6) { 2090 DCHECK_LE(sizeof(sockaddr_in6), static_cast<size_t>(address_len)); 2091 const struct sockaddr_in6* sockaddr = 2092 reinterpret_cast<const struct sockaddr_in6*>(address); 2093 return &sockaddr->sin6_port; 2094 } else { 2095 NOTREACHED(); 2096 return NULL; 2097 } 2098} 2099 2100int GetPortFromSockaddr(const struct sockaddr* address, socklen_t address_len) { 2101 const uint16* port_field = GetPortFieldFromSockaddr(address, address_len); 2102 if (!port_field) 2103 return -1; 2104 return ntohs(*port_field); 2105} 2106 2107} // namespace net 2108