gurl.cc revision 558790d6acca3451cf3a6b497803a5f07d0bec58
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifdef WIN32 6#include <windows.h> 7#else 8#include <pthread.h> 9#endif 10 11#include <algorithm> 12#include <ostream> 13 14#include "url/gurl.h" 15 16#include "base/logging.h" 17#include "url/url_canon_stdstring.h" 18#include "url/url_util.h" 19 20namespace { 21 22// External template that can handle initialization of either character type. 23// The input spec is given, and the canonical version will be placed in 24// |*canonical|, along with the parsing of the canonical spec in |*parsed|. 25template<typename STR> 26bool InitCanonical(const STR& input_spec, 27 std::string* canonical, 28 url_parse::Parsed* parsed) { 29 // Reserve enough room in the output for the input, plus some extra so that 30 // we have room if we have to escape a few things without reallocating. 31 canonical->reserve(input_spec.size() + 32); 32 url_canon::StdStringCanonOutput output(canonical); 33 bool success = url_util::Canonicalize( 34 input_spec.data(), static_cast<int>(input_spec.length()), 35 NULL, &output, parsed); 36 37 output.Complete(); // Must be done before using string. 38 return success; 39} 40 41static std::string* empty_string = NULL; 42static GURL* empty_gurl = NULL; 43 44#ifdef WIN32 45 46// Returns a static reference to an empty string for returning a reference 47// when there is no underlying string. 48const std::string& EmptyStringForGURL() { 49 // Avoid static object construction/destruction on startup/shutdown. 50 if (!empty_string) { 51 // Create the string. Be careful that we don't break in the case that this 52 // is being called from multiple threads. Statics are not threadsafe. 53 std::string* new_empty_string = new std::string; 54 if (InterlockedCompareExchangePointer( 55 reinterpret_cast<PVOID*>(&empty_string), new_empty_string, NULL)) { 56 // The old value was non-NULL, so no replacement was done. Another 57 // thread did the initialization out from under us. 58 delete new_empty_string; 59 } 60 } 61 return *empty_string; 62} 63 64#else 65 66static pthread_once_t empty_string_once = PTHREAD_ONCE_INIT; 67static pthread_once_t empty_gurl_once = PTHREAD_ONCE_INIT; 68 69void EmptyStringForGURLOnce(void) { 70 empty_string = new std::string; 71} 72 73const std::string& EmptyStringForGURL() { 74 // Avoid static object construction/destruction on startup/shutdown. 75 pthread_once(&empty_string_once, EmptyStringForGURLOnce); 76 return *empty_string; 77} 78 79#endif // WIN32 80 81} // namespace 82 83GURL::GURL() : is_valid_(false), inner_url_(NULL) { 84} 85 86GURL::GURL(const GURL& other) 87 : spec_(other.spec_), 88 is_valid_(other.is_valid_), 89 parsed_(other.parsed_), 90 inner_url_(NULL) { 91 if (other.inner_url_) 92 inner_url_ = new GURL(*other.inner_url_); 93 // Valid filesystem urls should always have an inner_url_. 94 DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_); 95} 96 97GURL::GURL(const std::string& url_string) : inner_url_(NULL) { 98 is_valid_ = InitCanonical(url_string, &spec_, &parsed_); 99 if (is_valid_ && SchemeIsFileSystem()) { 100 inner_url_ = 101 new GURL(spec_.data(), parsed_.Length(), *parsed_.inner_parsed(), true); 102 } 103} 104 105GURL::GURL(const base::string16& url_string) : inner_url_(NULL) { 106 is_valid_ = InitCanonical(url_string, &spec_, &parsed_); 107 if (is_valid_ && SchemeIsFileSystem()) { 108 inner_url_ = 109 new GURL(spec_.data(), parsed_.Length(), *parsed_.inner_parsed(), true); 110 } 111} 112 113GURL::GURL(const char* canonical_spec, size_t canonical_spec_len, 114 const url_parse::Parsed& parsed, bool is_valid) 115 : spec_(canonical_spec, canonical_spec_len), 116 is_valid_(is_valid), 117 parsed_(parsed), 118 inner_url_(NULL) { 119 InitializeFromCanonicalSpec(); 120} 121 122GURL::GURL(std::string canonical_spec, 123 const url_parse::Parsed& parsed, bool is_valid) 124 : is_valid_(is_valid), 125 parsed_(parsed), 126 inner_url_(NULL) { 127 spec_.swap(canonical_spec); 128 InitializeFromCanonicalSpec(); 129} 130 131void GURL::InitializeFromCanonicalSpec() { 132 if (is_valid_ && SchemeIsFileSystem()) { 133 inner_url_ = 134 new GURL(spec_.data(), parsed_.Length(), *parsed_.inner_parsed(), true); 135 } 136 137#ifndef NDEBUG 138 // For testing purposes, check that the parsed canonical URL is identical to 139 // what we would have produced. Skip checking for invalid URLs have no meaning 140 // and we can't always canonicalize then reproducabely. 141 if (is_valid_) { 142 url_parse::Component scheme; 143 if (!url_util::FindAndCompareScheme(spec_.data(), spec_.length(), 144 "filesystem", &scheme) || 145 scheme.begin == parsed_.scheme.begin) { 146 // We can't do this check on the inner_url of a filesystem URL, as 147 // canonical_spec actually points to the start of the outer URL, so we'd 148 // end up with infinite recursion in this constructor. 149 GURL test_url(spec_); 150 151 DCHECK(test_url.is_valid_ == is_valid_); 152 DCHECK(test_url.spec_ == spec_); 153 154 DCHECK(test_url.parsed_.scheme == parsed_.scheme); 155 DCHECK(test_url.parsed_.username == parsed_.username); 156 DCHECK(test_url.parsed_.password == parsed_.password); 157 DCHECK(test_url.parsed_.host == parsed_.host); 158 DCHECK(test_url.parsed_.port == parsed_.port); 159 DCHECK(test_url.parsed_.path == parsed_.path); 160 DCHECK(test_url.parsed_.query == parsed_.query); 161 DCHECK(test_url.parsed_.ref == parsed_.ref); 162 } 163 } 164#endif 165} 166 167GURL::~GURL() { 168 delete inner_url_; 169} 170 171GURL& GURL::operator=(const GURL& other) { 172 spec_ = other.spec_; 173 is_valid_ = other.is_valid_; 174 parsed_ = other.parsed_; 175 delete inner_url_; 176 inner_url_ = NULL; 177 if (other.inner_url_) 178 inner_url_ = new GURL(*other.inner_url_); 179 // Valid filesystem urls should always have an inner_url_. 180 DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_); 181 return *this; 182} 183 184const std::string& GURL::spec() const { 185 if (is_valid_ || spec_.empty()) 186 return spec_; 187 188 DCHECK(false) << "Trying to get the spec of an invalid URL!"; 189 return EmptyStringForGURL(); 190} 191 192GURL GURL::Resolve(const std::string& relative) const { 193 return ResolveWithCharsetConverter(relative, NULL); 194} 195GURL GURL::Resolve(const base::string16& relative) const { 196 return ResolveWithCharsetConverter(relative, NULL); 197} 198 199// Note: code duplicated below (it's inconvenient to use a template here). 200GURL GURL::ResolveWithCharsetConverter( 201 const std::string& relative, 202 url_canon::CharsetConverter* charset_converter) const { 203 // Not allowed for invalid URLs. 204 if (!is_valid_) 205 return GURL(); 206 207 GURL result; 208 209 // Reserve enough room in the output for the input, plus some extra so that 210 // we have room if we have to escape a few things without reallocating. 211 result.spec_.reserve(spec_.size() + 32); 212 url_canon::StdStringCanonOutput output(&result.spec_); 213 214 if (!url_util::ResolveRelative( 215 spec_.data(), static_cast<int>(spec_.length()), parsed_, 216 relative.data(), static_cast<int>(relative.length()), 217 charset_converter, &output, &result.parsed_)) { 218 // Error resolving, return an empty URL. 219 return GURL(); 220 } 221 222 output.Complete(); 223 result.is_valid_ = true; 224 if (result.SchemeIsFileSystem()) { 225 result.inner_url_ = new GURL(result.spec_.data(), result.parsed_.Length(), 226 *result.parsed_.inner_parsed(), true); 227 } 228 return result; 229} 230 231// Note: code duplicated above (it's inconvenient to use a template here). 232GURL GURL::ResolveWithCharsetConverter( 233 const base::string16& relative, 234 url_canon::CharsetConverter* charset_converter) const { 235 // Not allowed for invalid URLs. 236 if (!is_valid_) 237 return GURL(); 238 239 GURL result; 240 241 // Reserve enough room in the output for the input, plus some extra so that 242 // we have room if we have to escape a few things without reallocating. 243 result.spec_.reserve(spec_.size() + 32); 244 url_canon::StdStringCanonOutput output(&result.spec_); 245 246 if (!url_util::ResolveRelative( 247 spec_.data(), static_cast<int>(spec_.length()), parsed_, 248 relative.data(), static_cast<int>(relative.length()), 249 charset_converter, &output, &result.parsed_)) { 250 // Error resolving, return an empty URL. 251 return GURL(); 252 } 253 254 output.Complete(); 255 result.is_valid_ = true; 256 if (result.SchemeIsFileSystem()) { 257 result.inner_url_ = new GURL(result.spec_.data(), result.parsed_.Length(), 258 *result.parsed_.inner_parsed(), true); 259 } 260 return result; 261} 262 263// Note: code duplicated below (it's inconvenient to use a template here). 264GURL GURL::ReplaceComponents( 265 const url_canon::Replacements<char>& replacements) const { 266 GURL result; 267 268 // Not allowed for invalid URLs. 269 if (!is_valid_) 270 return GURL(); 271 272 // Reserve enough room in the output for the input, plus some extra so that 273 // we have room if we have to escape a few things without reallocating. 274 result.spec_.reserve(spec_.size() + 32); 275 url_canon::StdStringCanonOutput output(&result.spec_); 276 277 result.is_valid_ = url_util::ReplaceComponents( 278 spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements, 279 NULL, &output, &result.parsed_); 280 281 output.Complete(); 282 if (result.is_valid_ && result.SchemeIsFileSystem()) { 283 result.inner_url_ = new GURL(spec_.data(), result.parsed_.Length(), 284 *result.parsed_.inner_parsed(), true); 285 } 286 return result; 287} 288 289// Note: code duplicated above (it's inconvenient to use a template here). 290GURL GURL::ReplaceComponents( 291 const url_canon::Replacements<base::char16>& replacements) const { 292 GURL result; 293 294 // Not allowed for invalid URLs. 295 if (!is_valid_) 296 return GURL(); 297 298 // Reserve enough room in the output for the input, plus some extra so that 299 // we have room if we have to escape a few things without reallocating. 300 result.spec_.reserve(spec_.size() + 32); 301 url_canon::StdStringCanonOutput output(&result.spec_); 302 303 result.is_valid_ = url_util::ReplaceComponents( 304 spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements, 305 NULL, &output, &result.parsed_); 306 307 output.Complete(); 308 if (result.is_valid_ && result.SchemeIsFileSystem()) { 309 result.inner_url_ = new GURL(spec_.data(), result.parsed_.Length(), 310 *result.parsed_.inner_parsed(), true); 311 } 312 return result; 313} 314 315GURL GURL::GetOrigin() const { 316 // This doesn't make sense for invalid or nonstandard URLs, so return 317 // the empty URL 318 if (!is_valid_ || !IsStandard()) 319 return GURL(); 320 321 if (SchemeIsFileSystem()) 322 return inner_url_->GetOrigin(); 323 324 url_canon::Replacements<char> replacements; 325 replacements.ClearUsername(); 326 replacements.ClearPassword(); 327 replacements.ClearPath(); 328 replacements.ClearQuery(); 329 replacements.ClearRef(); 330 331 return ReplaceComponents(replacements); 332} 333 334GURL GURL::GetWithEmptyPath() const { 335 // This doesn't make sense for invalid or nonstandard URLs, so return 336 // the empty URL. 337 if (!is_valid_ || !IsStandard()) 338 return GURL(); 339 340 // We could optimize this since we know that the URL is canonical, and we are 341 // appending a canonical path, so avoiding re-parsing. 342 GURL other(*this); 343 if (parsed_.path.len == 0) 344 return other; 345 346 // Clear everything after the path. 347 other.parsed_.query.reset(); 348 other.parsed_.ref.reset(); 349 350 // Set the path, since the path is longer than one, we can just set the 351 // first character and resize. 352 other.spec_[other.parsed_.path.begin] = '/'; 353 other.parsed_.path.len = 1; 354 other.spec_.resize(other.parsed_.path.begin + 1); 355 return other; 356} 357 358bool GURL::IsStandard() const { 359 return url_util::IsStandard(spec_.data(), parsed_.scheme); 360} 361 362bool GURL::SchemeIs(const char* lower_ascii_scheme) const { 363 if (parsed_.scheme.len <= 0) 364 return lower_ascii_scheme == NULL; 365 return url_util::LowerCaseEqualsASCII(spec_.data() + parsed_.scheme.begin, 366 spec_.data() + parsed_.scheme.end(), 367 lower_ascii_scheme); 368} 369 370int GURL::IntPort() const { 371 if (parsed_.port.is_nonempty()) 372 return url_parse::ParsePort(spec_.data(), parsed_.port); 373 return url_parse::PORT_UNSPECIFIED; 374} 375 376int GURL::EffectiveIntPort() const { 377 int int_port = IntPort(); 378 if (int_port == url_parse::PORT_UNSPECIFIED && IsStandard()) 379 return url_canon::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin, 380 parsed_.scheme.len); 381 return int_port; 382} 383 384std::string GURL::ExtractFileName() const { 385 url_parse::Component file_component; 386 url_parse::ExtractFileName(spec_.data(), parsed_.path, &file_component); 387 return ComponentString(file_component); 388} 389 390std::string GURL::PathForRequest() const { 391 DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty"; 392 if (parsed_.ref.len >= 0) { 393 // Clip off the reference when it exists. The reference starts after the # 394 // sign, so we have to subtract one to also remove it. 395 return std::string(spec_, parsed_.path.begin, 396 parsed_.ref.begin - parsed_.path.begin - 1); 397 } 398 // Compute the actual path length, rather than depending on the spec's 399 // terminator. If we're an inner_url, our spec continues on into our outer 400 // url's path/query/ref. 401 int path_len = parsed_.path.len; 402 if (parsed_.query.is_valid()) 403 path_len = parsed_.query.end() - parsed_.path.begin; 404 405 return std::string(spec_, parsed_.path.begin, path_len); 406} 407 408std::string GURL::HostNoBrackets() const { 409 // If host looks like an IPv6 literal, strip the square brackets. 410 url_parse::Component h(parsed_.host); 411 if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') { 412 h.begin++; 413 h.len -= 2; 414 } 415 return ComponentString(h); 416} 417 418bool GURL::HostIsIPAddress() const { 419 if (!is_valid_ || spec_.empty()) 420 return false; 421 422 url_canon::RawCanonOutputT<char, 128> ignored_output; 423 url_canon::CanonHostInfo host_info; 424 url_canon::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, 425 &ignored_output, &host_info); 426 return host_info.IsIPAddress(); 427} 428 429#ifdef WIN32 430 431const GURL& GURL::EmptyGURL() { 432 // Avoid static object construction/destruction on startup/shutdown. 433 if (!empty_gurl) { 434 // Create the string. Be careful that we don't break in the case that this 435 // is being called from multiple threads. 436 GURL* new_empty_gurl = new GURL; 437 if (InterlockedCompareExchangePointer( 438 reinterpret_cast<PVOID*>(&empty_gurl), new_empty_gurl, NULL)) { 439 // The old value was non-NULL, so no replacement was done. Another 440 // thread did the initialization out from under us. 441 delete new_empty_gurl; 442 } 443 } 444 return *empty_gurl; 445} 446 447#else 448 449void EmptyGURLOnce(void) { 450 empty_gurl = new GURL; 451} 452 453const GURL& GURL::EmptyGURL() { 454 // Avoid static object construction/destruction on startup/shutdown. 455 pthread_once(&empty_gurl_once, EmptyGURLOnce); 456 return *empty_gurl; 457} 458 459#endif // WIN32 460 461bool GURL::DomainIs(const char* lower_ascii_domain, 462 int domain_len) const { 463 // Return false if this URL is not valid or domain is empty. 464 if (!is_valid_ || !domain_len) 465 return false; 466 467 // FileSystem URLs have empty parsed_.host, so check this first. 468 if (SchemeIsFileSystem() && inner_url_) 469 return inner_url_->DomainIs(lower_ascii_domain, domain_len); 470 471 if (!parsed_.host.is_nonempty()) 472 return false; 473 474 // Check whether the host name is end with a dot. If yes, treat it 475 // the same as no-dot unless the input comparison domain is end 476 // with dot. 477 const char* last_pos = spec_.data() + parsed_.host.end() - 1; 478 int host_len = parsed_.host.len; 479 if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) { 480 last_pos--; 481 host_len--; 482 } 483 484 // Return false if host's length is less than domain's length. 485 if (host_len < domain_len) 486 return false; 487 488 // Compare this url whether belong specific domain. 489 const char* start_pos = spec_.data() + parsed_.host.begin + 490 host_len - domain_len; 491 492 if (!url_util::LowerCaseEqualsASCII(start_pos, 493 last_pos + 1, 494 lower_ascii_domain, 495 lower_ascii_domain + domain_len)) 496 return false; 497 498 // Check whether host has right domain start with dot, make sure we got 499 // right domain range. For example www.google.com has domain 500 // "google.com" but www.iamnotgoogle.com does not. 501 if ('.' != lower_ascii_domain[0] && host_len > domain_len && 502 '.' != *(start_pos - 1)) 503 return false; 504 505 return true; 506} 507 508void GURL::Swap(GURL* other) { 509 spec_.swap(other->spec_); 510 std::swap(is_valid_, other->is_valid_); 511 std::swap(parsed_, other->parsed_); 512 std::swap(inner_url_, other->inner_url_); 513} 514 515std::ostream& operator<<(std::ostream& out, const GURL& url) { 516 return out << url.possibly_invalid_spec(); 517} 518