url_blacklist_manager.cc revision 1320f92c476a1ad9d19dba2a48c72b75566198e9
1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/policy/core/browser/url_blacklist_manager.h" 6 7#include "base/bind.h" 8#include "base/files/file_path.h" 9#include "base/location.h" 10#include "base/message_loop/message_loop_proxy.h" 11#include "base/prefs/pref_service.h" 12#include "base/sequenced_task_runner.h" 13#include "base/stl_util.h" 14#include "base/strings/string_number_conversions.h" 15#include "base/task_runner_util.h" 16#include "base/values.h" 17#include "components/policy/core/common/policy_pref_names.h" 18#include "components/pref_registry/pref_registry_syncable.h" 19#include "net/base/filename_util.h" 20#include "net/base/load_flags.h" 21#include "net/base/net_errors.h" 22#include "net/url_request/url_request.h" 23#include "url/url_constants.h" 24#include "url/url_parse.h" 25 26using url_matcher::URLMatcher; 27using url_matcher::URLMatcherCondition; 28using url_matcher::URLMatcherConditionFactory; 29using url_matcher::URLMatcherConditionSet; 30using url_matcher::URLMatcherPortFilter; 31using url_matcher::URLMatcherSchemeFilter; 32using url_matcher::URLQueryElementMatcherCondition; 33 34namespace policy { 35 36namespace { 37 38// List of schemes of URLs that should not be blocked by the "*" wildcard in 39// the blacklist. Note that URLs with these schemes can still be blocked with 40// a more specific filter e.g. "chrome-extension://*". 41// The schemes are hardcoded here to avoid dependencies on //extensions and 42// //chrome. 43const char* kBypassBlacklistWildcardForSchemes[] = { 44 // For internal extension URLs e.g. the Bookmark Manager and the File 45 // Manager on Chrome OS. 46 "chrome-extension", 47 48 // NTP on Android. 49 "chrome-native", 50 51 // NTP on other platforms. 52 "chrome-search", 53}; 54 55// Maximum filters per policy. Filters over this index are ignored. 56const size_t kMaxFiltersPerPolicy = 1000; 57 58// A task that builds the blacklist on a background thread. 59scoped_ptr<URLBlacklist> BuildBlacklist( 60 scoped_ptr<base::ListValue> block, 61 scoped_ptr<base::ListValue> allow, 62 URLBlacklist::SegmentURLCallback segment_url) { 63 scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url)); 64 blacklist->Block(block.get()); 65 blacklist->Allow(allow.get()); 66 return blacklist.Pass(); 67} 68 69// Tokenise the parameter |query| and add appropriate query element matcher 70// conditions to the |query_conditions|. 71void ProcessQueryToConditions( 72 url_matcher::URLMatcherConditionFactory* condition_factory, 73 const std::string& query, 74 bool allow, 75 std::set<URLQueryElementMatcherCondition>* query_conditions) { 76 url::Component query_left = url::MakeRange(0, query.length()); 77 url::Component key; 78 url::Component value; 79 // Depending on the filter type being black-list or white-list, the matcher 80 // choose any or every match. The idea is a URL should be black-listed if 81 // there is any occurrence of the key value pair. It should be white-listed 82 // only if every occurrence of the key is followed by the value. This avoids 83 // situations such as a user appending a white-listed video parameter in the 84 // end of the query and watching a video of his choice (the last parameter is 85 // ignored by some web servers like youtube's). 86 URLQueryElementMatcherCondition::Type match_type = 87 allow ? URLQueryElementMatcherCondition::MATCH_ALL 88 : URLQueryElementMatcherCondition::MATCH_ANY; 89 90 while (ExtractQueryKeyValue(query.data(), &query_left, &key, &value)) { 91 URLQueryElementMatcherCondition::QueryElementType query_element_type = 92 value.len ? URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY_VALUE 93 : URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY; 94 URLQueryElementMatcherCondition::QueryValueMatchType query_value_match_type; 95 if (!value.len && key.len && query[key.end() - 1] == '*') { 96 --key.len; 97 query_value_match_type = 98 URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX; 99 } else if (value.len && query[value.end() - 1] == '*') { 100 --value.len; 101 query_value_match_type = 102 URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX; 103 } else { 104 query_value_match_type = 105 URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_EXACT; 106 } 107 query_conditions->insert( 108 URLQueryElementMatcherCondition(query.substr(key.begin, key.len), 109 query.substr(value.begin, value.len), 110 query_value_match_type, 111 query_element_type, 112 match_type, 113 condition_factory)); 114 } 115} 116 117bool BypassBlacklistWildcardForURL(const GURL& url) { 118 const std::string& scheme = url.scheme(); 119 for (size_t i = 0; i < arraysize(kBypassBlacklistWildcardForSchemes); ++i) { 120 if (scheme == kBypassBlacklistWildcardForSchemes[i]) 121 return true; 122 } 123 return false; 124} 125 126} // namespace 127 128struct URLBlacklist::FilterComponents { 129 FilterComponents() : port(0), match_subdomains(true), allow(true) {} 130 ~FilterComponents() {} 131 132 // Returns true if |this| represents the "*" filter in the blacklist. 133 bool IsBlacklistWildcard() const { 134 return !allow && host.empty() && scheme.empty() && path.empty() && 135 query.empty() && port == 0 && number_of_key_value_pairs == 0 && 136 match_subdomains; 137 } 138 139 std::string scheme; 140 std::string host; 141 uint16 port; 142 std::string path; 143 std::string query; 144 int number_of_key_value_pairs; 145 bool match_subdomains; 146 bool allow; 147}; 148 149URLBlacklist::URLBlacklist(SegmentURLCallback segment_url) 150 : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {} 151 152URLBlacklist::~URLBlacklist() {} 153 154void URLBlacklist::AddFilters(bool allow, 155 const base::ListValue* list) { 156 URLMatcherConditionSet::Vector all_conditions; 157 size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize()); 158 for (size_t i = 0; i < size; ++i) { 159 std::string pattern; 160 bool success = list->GetString(i, &pattern); 161 DCHECK(success); 162 FilterComponents components; 163 components.allow = allow; 164 if (!FilterToComponents(segment_url_, 165 pattern, 166 &components.scheme, 167 &components.host, 168 &components.match_subdomains, 169 &components.port, 170 &components.path, 171 &components.query)) { 172 LOG(ERROR) << "Invalid pattern " << pattern; 173 continue; 174 } 175 176 scoped_refptr<URLMatcherConditionSet> condition_set = 177 CreateConditionSet(url_matcher_.get(), 178 ++id_, 179 components.scheme, 180 components.host, 181 components.match_subdomains, 182 components.port, 183 components.path, 184 components.query, 185 allow); 186 components.number_of_key_value_pairs = 187 condition_set->query_conditions().size(); 188 all_conditions.push_back(condition_set); 189 filters_[id_] = components; 190 } 191 url_matcher_->AddConditionSets(all_conditions); 192} 193 194void URLBlacklist::Block(const base::ListValue* filters) { 195 AddFilters(false, filters); 196} 197 198void URLBlacklist::Allow(const base::ListValue* filters) { 199 AddFilters(true, filters); 200} 201 202bool URLBlacklist::IsURLBlocked(const GURL& url) const { 203 std::set<URLMatcherConditionSet::ID> matching_ids = 204 url_matcher_->MatchURL(url); 205 206 const FilterComponents* max = NULL; 207 for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin(); 208 id != matching_ids.end(); ++id) { 209 std::map<int, FilterComponents>::const_iterator it = filters_.find(*id); 210 DCHECK(it != filters_.end()); 211 const FilterComponents& filter = it->second; 212 if (!max || FilterTakesPrecedence(filter, *max)) 213 max = &filter; 214 } 215 216 // Default to allow. 217 if (!max) 218 return false; 219 220 // Some of the internal Chrome URLs are not affected by the "*" in the 221 // blacklist. Note that the "*" is the lowest priority filter possible, so 222 // any higher priority filter will be applied first. 223 if (max->IsBlacklistWildcard() && BypassBlacklistWildcardForURL(url)) 224 return false; 225 226 return !max->allow; 227} 228 229size_t URLBlacklist::Size() const { 230 return filters_.size(); 231} 232 233// static 234bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url, 235 const std::string& filter, 236 std::string* scheme, 237 std::string* host, 238 bool* match_subdomains, 239 uint16* port, 240 std::string* path, 241 std::string* query) { 242 url::Parsed parsed; 243 244 if (segment_url(filter, &parsed) == url::kFileScheme) { 245 base::FilePath file_path; 246 if (!net::FileURLToFilePath(GURL(filter), &file_path)) 247 return false; 248 249 *scheme = url::kFileScheme; 250 host->clear(); 251 *match_subdomains = true; 252 *port = 0; 253 // Special path when the |filter| is 'file://*'. 254 *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe(); 255#if defined(FILE_PATH_USES_WIN_SEPARATORS) 256 // Separators have to be canonicalized on Windows. 257 std::replace(path->begin(), path->end(), '\\', '/'); 258 *path = "/" + *path; 259#endif 260 return true; 261 } 262 263 if (!parsed.host.is_nonempty()) 264 return false; 265 266 if (parsed.scheme.is_nonempty()) 267 scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len); 268 else 269 scheme->clear(); 270 271 host->assign(filter, parsed.host.begin, parsed.host.len); 272 // Special '*' host, matches all hosts. 273 if (*host == "*") { 274 host->clear(); 275 *match_subdomains = true; 276 } else if ((*host)[0] == '.') { 277 // A leading dot in the pattern syntax means that we don't want to match 278 // subdomains. 279 host->erase(0, 1); 280 *match_subdomains = false; 281 } else { 282 url::RawCanonOutputT<char> output; 283 url::CanonHostInfo host_info; 284 url::CanonicalizeHostVerbose(filter.c_str(), parsed.host, &output, 285 &host_info); 286 if (host_info.family == url::CanonHostInfo::NEUTRAL) { 287 // We want to match subdomains. Add a dot in front to make sure we only 288 // match at domain component boundaries. 289 *host = "." + *host; 290 *match_subdomains = true; 291 } else { 292 *match_subdomains = false; 293 } 294 } 295 296 if (parsed.port.is_nonempty()) { 297 int int_port; 298 if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len), 299 &int_port)) { 300 return false; 301 } 302 if (int_port <= 0 || int_port > kuint16max) 303 return false; 304 *port = int_port; 305 } else { 306 // Match any port. 307 *port = 0; 308 } 309 310 if (parsed.path.is_nonempty()) 311 path->assign(filter, parsed.path.begin, parsed.path.len); 312 else 313 path->clear(); 314 315 if (query) { 316 if (parsed.query.is_nonempty()) 317 query->assign(filter, parsed.query.begin, parsed.query.len); 318 else 319 query->clear(); 320 } 321 322 return true; 323} 324 325// static 326scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet( 327 URLMatcher* url_matcher, 328 int id, 329 const std::string& scheme, 330 const std::string& host, 331 bool match_subdomains, 332 uint16 port, 333 const std::string& path, 334 const std::string& query, 335 bool allow) { 336 URLMatcherConditionFactory* condition_factory = 337 url_matcher->condition_factory(); 338 std::set<URLMatcherCondition> conditions; 339 conditions.insert(match_subdomains ? 340 condition_factory->CreateHostSuffixPathPrefixCondition(host, path) : 341 condition_factory->CreateHostEqualsPathPrefixCondition(host, path)); 342 343 std::set<URLQueryElementMatcherCondition> query_conditions; 344 if (!query.empty()) { 345 ProcessQueryToConditions( 346 condition_factory, query, allow, &query_conditions); 347 } 348 349 scoped_ptr<URLMatcherSchemeFilter> scheme_filter; 350 if (!scheme.empty()) 351 scheme_filter.reset(new URLMatcherSchemeFilter(scheme)); 352 353 scoped_ptr<URLMatcherPortFilter> port_filter; 354 if (port != 0) { 355 std::vector<URLMatcherPortFilter::Range> ranges; 356 ranges.push_back(URLMatcherPortFilter::CreateRange(port)); 357 port_filter.reset(new URLMatcherPortFilter(ranges)); 358 } 359 360 return new URLMatcherConditionSet(id, 361 conditions, 362 query_conditions, 363 scheme_filter.Pass(), 364 port_filter.Pass()); 365} 366 367// static 368bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs, 369 const FilterComponents& rhs) { 370 // The "*" wildcard is the lowest priority filter. 371 if (rhs.IsBlacklistWildcard()) 372 return true; 373 374 if (lhs.match_subdomains && !rhs.match_subdomains) 375 return false; 376 if (!lhs.match_subdomains && rhs.match_subdomains) 377 return true; 378 379 size_t host_length = lhs.host.length(); 380 size_t other_host_length = rhs.host.length(); 381 if (host_length != other_host_length) 382 return host_length > other_host_length; 383 384 size_t path_length = lhs.path.length(); 385 size_t other_path_length = rhs.path.length(); 386 if (path_length != other_path_length) 387 return path_length > other_path_length; 388 389 if (lhs.number_of_key_value_pairs != rhs.number_of_key_value_pairs) 390 return lhs.number_of_key_value_pairs > rhs.number_of_key_value_pairs; 391 392 if (lhs.allow && !rhs.allow) 393 return true; 394 395 return false; 396} 397 398URLBlacklistManager::URLBlacklistManager( 399 PrefService* pref_service, 400 const scoped_refptr<base::SequencedTaskRunner>& background_task_runner, 401 const scoped_refptr<base::SequencedTaskRunner>& io_task_runner, 402 URLBlacklist::SegmentURLCallback segment_url, 403 OverrideBlacklistCallback override_blacklist) 404 : pref_service_(pref_service), 405 background_task_runner_(background_task_runner), 406 io_task_runner_(io_task_runner), 407 segment_url_(segment_url), 408 override_blacklist_(override_blacklist), 409 ui_task_runner_(base::MessageLoopProxy::current()), 410 blacklist_(new URLBlacklist(segment_url)), 411 ui_weak_ptr_factory_(this), 412 io_weak_ptr_factory_(this) { 413 pref_change_registrar_.Init(pref_service_); 414 base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate, 415 base::Unretained(this)); 416 pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback); 417 pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback); 418 419 // Start enforcing the policies without a delay when they are present at 420 // startup. 421 if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist)) 422 Update(); 423} 424 425void URLBlacklistManager::ShutdownOnUIThread() { 426 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 427 // Cancel any pending updates, and stop listening for pref change updates. 428 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 429 pref_change_registrar_.RemoveAll(); 430} 431 432URLBlacklistManager::~URLBlacklistManager() { 433} 434 435void URLBlacklistManager::ScheduleUpdate() { 436 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 437 // Cancel pending updates, if any. This can happen if two preferences that 438 // change the blacklist are updated in one message loop cycle. In those cases, 439 // only rebuild the blacklist after all the preference updates are processed. 440 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 441 ui_task_runner_->PostTask( 442 FROM_HERE, 443 base::Bind(&URLBlacklistManager::Update, 444 ui_weak_ptr_factory_.GetWeakPtr())); 445} 446 447void URLBlacklistManager::Update() { 448 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 449 450 // The preferences can only be read on the UI thread. 451 scoped_ptr<base::ListValue> block( 452 pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy()); 453 scoped_ptr<base::ListValue> allow( 454 pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy()); 455 456 // Go through the IO thread to grab a WeakPtr to |this|. This is safe from 457 // here, since this task will always execute before a potential deletion of 458 // ProfileIOData on IO. 459 io_task_runner_->PostTask(FROM_HERE, 460 base::Bind(&URLBlacklistManager::UpdateOnIO, 461 base::Unretained(this), 462 base::Passed(&block), 463 base::Passed(&allow))); 464} 465 466void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block, 467 scoped_ptr<base::ListValue> allow) { 468 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 469 // The URLBlacklist is built on a worker thread. Once it's ready, it is passed 470 // to the URLBlacklistManager on IO. 471 base::PostTaskAndReplyWithResult( 472 background_task_runner_.get(), 473 FROM_HERE, 474 base::Bind(&BuildBlacklist, 475 base::Passed(&block), 476 base::Passed(&allow), 477 segment_url_), 478 base::Bind(&URLBlacklistManager::SetBlacklist, 479 io_weak_ptr_factory_.GetWeakPtr())); 480} 481 482void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) { 483 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 484 blacklist_ = blacklist.Pass(); 485} 486 487bool URLBlacklistManager::IsURLBlocked(const GURL& url) const { 488 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 489 return blacklist_->IsURLBlocked(url); 490} 491 492bool URLBlacklistManager::IsRequestBlocked( 493 const net::URLRequest& request, int* reason) const { 494 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 495#if !defined(OS_IOS) 496 // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283 497 int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME; 498 if ((request.load_flags() & filter_flags) == 0) 499 return false; 500#endif 501 502 bool block = false; 503 if (override_blacklist_.Run(request.url(), &block, reason)) 504 return block; 505 506 *reason = net::ERR_BLOCKED_BY_ADMINISTRATOR; 507 return IsURLBlocked(request.url()); 508} 509 510// static 511void URLBlacklistManager::RegisterProfilePrefs( 512 user_prefs::PrefRegistrySyncable* registry) { 513 registry->RegisterListPref(policy_prefs::kUrlBlacklist, 514 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 515 registry->RegisterListPref(policy_prefs::kUrlWhitelist, 516 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 517} 518 519} // namespace policy 520