url_blacklist_manager.cc revision c5cede9ae108bb15f6b7a8aea21c7e1fefa2834c
1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/policy/core/browser/url_blacklist_manager.h" 6 7#include "base/bind.h" 8#include "base/files/file_path.h" 9#include "base/location.h" 10#include "base/message_loop/message_loop_proxy.h" 11#include "base/prefs/pref_service.h" 12#include "base/sequenced_task_runner.h" 13#include "base/stl_util.h" 14#include "base/strings/string_number_conversions.h" 15#include "base/task_runner_util.h" 16#include "base/values.h" 17#include "components/policy/core/common/policy_pref_names.h" 18#include "components/user_prefs/pref_registry_syncable.h" 19#include "net/base/filename_util.h" 20#include "net/base/load_flags.h" 21#include "net/base/net_errors.h" 22#include "net/url_request/url_request.h" 23 24using url_matcher::URLMatcher; 25using url_matcher::URLMatcherCondition; 26using url_matcher::URLMatcherConditionFactory; 27using url_matcher::URLMatcherConditionSet; 28using url_matcher::URLMatcherPortFilter; 29using url_matcher::URLMatcherSchemeFilter; 30 31namespace policy { 32 33namespace { 34 35const char kFileScheme[] = "file"; 36 37// Maximum filters per policy. Filters over this index are ignored. 38const size_t kMaxFiltersPerPolicy = 1000; 39 40// A task that builds the blacklist on a background thread. 41scoped_ptr<URLBlacklist> BuildBlacklist( 42 scoped_ptr<base::ListValue> block, 43 scoped_ptr<base::ListValue> allow, 44 URLBlacklist::SegmentURLCallback segment_url) { 45 scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url)); 46 blacklist->Block(block.get()); 47 blacklist->Allow(allow.get()); 48 return blacklist.Pass(); 49} 50 51} // namespace 52 53struct URLBlacklist::FilterComponents { 54 FilterComponents() : port(0), match_subdomains(true), allow(true) {} 55 ~FilterComponents() {} 56 57 std::string scheme; 58 std::string host; 59 uint16 port; 60 std::string path; 61 bool match_subdomains; 62 bool allow; 63}; 64 65URLBlacklist::URLBlacklist(SegmentURLCallback segment_url) 66 : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {} 67 68URLBlacklist::~URLBlacklist() {} 69 70void URLBlacklist::AddFilters(bool allow, 71 const base::ListValue* list) { 72 URLMatcherConditionSet::Vector all_conditions; 73 size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize()); 74 for (size_t i = 0; i < size; ++i) { 75 std::string pattern; 76 bool success = list->GetString(i, &pattern); 77 DCHECK(success); 78 FilterComponents components; 79 components.allow = allow; 80 if (!FilterToComponents(segment_url_, pattern, &components.scheme, 81 &components.host, &components.match_subdomains, 82 &components.port, &components.path)) { 83 LOG(ERROR) << "Invalid pattern " << pattern; 84 continue; 85 } 86 87 all_conditions.push_back( 88 CreateConditionSet(url_matcher_.get(), ++id_, components.scheme, 89 components.host, components.match_subdomains, 90 components.port, components.path)); 91 filters_[id_] = components; 92 } 93 url_matcher_->AddConditionSets(all_conditions); 94} 95 96void URLBlacklist::Block(const base::ListValue* filters) { 97 AddFilters(false, filters); 98} 99 100void URLBlacklist::Allow(const base::ListValue* filters) { 101 AddFilters(true, filters); 102} 103 104bool URLBlacklist::IsURLBlocked(const GURL& url) const { 105 std::set<URLMatcherConditionSet::ID> matching_ids = 106 url_matcher_->MatchURL(url); 107 108 const FilterComponents* max = NULL; 109 for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin(); 110 id != matching_ids.end(); ++id) { 111 std::map<int, FilterComponents>::const_iterator it = filters_.find(*id); 112 DCHECK(it != filters_.end()); 113 const FilterComponents& filter = it->second; 114 if (!max || FilterTakesPrecedence(filter, *max)) 115 max = &filter; 116 } 117 118 // Default to allow. 119 if (!max) 120 return false; 121 122 return !max->allow; 123} 124 125size_t URLBlacklist::Size() const { 126 return filters_.size(); 127} 128 129// static 130bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url, 131 const std::string& filter, 132 std::string* scheme, 133 std::string* host, 134 bool* match_subdomains, 135 uint16* port, 136 std::string* path) { 137 url_parse::Parsed parsed; 138 139 if (segment_url(filter, &parsed) == kFileScheme) { 140 base::FilePath file_path; 141 if (!net::FileURLToFilePath(GURL(filter), &file_path)) 142 return false; 143 144 *scheme = kFileScheme; 145 host->clear(); 146 *match_subdomains = true; 147 *port = 0; 148 // Special path when the |filter| is 'file://*'. 149 *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe(); 150#if defined(FILE_PATH_USES_WIN_SEPARATORS) 151 // Separators have to be canonicalized on Windows. 152 std::replace(path->begin(), path->end(), '\\', '/'); 153 *path = "/" + *path; 154#endif 155 return true; 156 } 157 158 if (!parsed.host.is_nonempty()) 159 return false; 160 161 if (parsed.scheme.is_nonempty()) 162 scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len); 163 else 164 scheme->clear(); 165 166 host->assign(filter, parsed.host.begin, parsed.host.len); 167 // Special '*' host, matches all hosts. 168 if (*host == "*") { 169 host->clear(); 170 *match_subdomains = true; 171 } else if ((*host)[0] == '.') { 172 // A leading dot in the pattern syntax means that we don't want to match 173 // subdomains. 174 host->erase(0, 1); 175 *match_subdomains = false; 176 } else { 177 url_canon::RawCanonOutputT<char> output; 178 url_canon::CanonHostInfo host_info; 179 url_canon::CanonicalizeHostVerbose(filter.c_str(), parsed.host, 180 &output, &host_info); 181 if (host_info.family == url_canon::CanonHostInfo::NEUTRAL) { 182 // We want to match subdomains. Add a dot in front to make sure we only 183 // match at domain component boundaries. 184 *host = "." + *host; 185 *match_subdomains = true; 186 } else { 187 *match_subdomains = false; 188 } 189 } 190 191 if (parsed.port.is_nonempty()) { 192 int int_port; 193 if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len), 194 &int_port)) { 195 return false; 196 } 197 if (int_port <= 0 || int_port > kuint16max) 198 return false; 199 *port = int_port; 200 } else { 201 // Match any port. 202 *port = 0; 203 } 204 205 if (parsed.path.is_nonempty()) 206 path->assign(filter, parsed.path.begin, parsed.path.len); 207 else 208 path->clear(); 209 210 return true; 211} 212 213// static 214scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet( 215 URLMatcher* url_matcher, 216 int id, 217 const std::string& scheme, 218 const std::string& host, 219 bool match_subdomains, 220 uint16 port, 221 const std::string& path) { 222 URLMatcherConditionFactory* condition_factory = 223 url_matcher->condition_factory(); 224 std::set<URLMatcherCondition> conditions; 225 conditions.insert(match_subdomains ? 226 condition_factory->CreateHostSuffixPathPrefixCondition(host, path) : 227 condition_factory->CreateHostEqualsPathPrefixCondition(host, path)); 228 229 scoped_ptr<URLMatcherSchemeFilter> scheme_filter; 230 if (!scheme.empty()) 231 scheme_filter.reset(new URLMatcherSchemeFilter(scheme)); 232 233 scoped_ptr<URLMatcherPortFilter> port_filter; 234 if (port != 0) { 235 std::vector<URLMatcherPortFilter::Range> ranges; 236 ranges.push_back(URLMatcherPortFilter::CreateRange(port)); 237 port_filter.reset(new URLMatcherPortFilter(ranges)); 238 } 239 240 return new URLMatcherConditionSet(id, conditions, 241 scheme_filter.Pass(), port_filter.Pass()); 242} 243 244// static 245bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs, 246 const FilterComponents& rhs) { 247 if (lhs.match_subdomains && !rhs.match_subdomains) 248 return false; 249 if (!lhs.match_subdomains && rhs.match_subdomains) 250 return true; 251 252 size_t host_length = lhs.host.length(); 253 size_t other_host_length = rhs.host.length(); 254 if (host_length != other_host_length) 255 return host_length > other_host_length; 256 257 size_t path_length = lhs.path.length(); 258 size_t other_path_length = rhs.path.length(); 259 if (path_length != other_path_length) 260 return path_length > other_path_length; 261 262 if (lhs.allow && !rhs.allow) 263 return true; 264 265 return false; 266} 267 268URLBlacklistManager::URLBlacklistManager( 269 PrefService* pref_service, 270 const scoped_refptr<base::SequencedTaskRunner>& background_task_runner, 271 const scoped_refptr<base::SequencedTaskRunner>& io_task_runner, 272 URLBlacklist::SegmentURLCallback segment_url, 273 OverrideBlacklistCallback override_blacklist) 274 : ui_weak_ptr_factory_(this), 275 pref_service_(pref_service), 276 background_task_runner_(background_task_runner), 277 io_task_runner_(io_task_runner), 278 segment_url_(segment_url), 279 override_blacklist_(override_blacklist), 280 io_weak_ptr_factory_(this), 281 ui_task_runner_(base::MessageLoopProxy::current()), 282 blacklist_(new URLBlacklist(segment_url)) { 283 pref_change_registrar_.Init(pref_service_); 284 base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate, 285 base::Unretained(this)); 286 pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback); 287 pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback); 288 289 // Start enforcing the policies without a delay when they are present at 290 // startup. 291 if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist)) 292 Update(); 293} 294 295void URLBlacklistManager::ShutdownOnUIThread() { 296 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 297 // Cancel any pending updates, and stop listening for pref change updates. 298 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 299 pref_change_registrar_.RemoveAll(); 300} 301 302URLBlacklistManager::~URLBlacklistManager() { 303} 304 305void URLBlacklistManager::ScheduleUpdate() { 306 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 307 // Cancel pending updates, if any. This can happen if two preferences that 308 // change the blacklist are updated in one message loop cycle. In those cases, 309 // only rebuild the blacklist after all the preference updates are processed. 310 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 311 ui_task_runner_->PostTask( 312 FROM_HERE, 313 base::Bind(&URLBlacklistManager::Update, 314 ui_weak_ptr_factory_.GetWeakPtr())); 315} 316 317void URLBlacklistManager::Update() { 318 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 319 320 // The preferences can only be read on the UI thread. 321 scoped_ptr<base::ListValue> block( 322 pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy()); 323 scoped_ptr<base::ListValue> allow( 324 pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy()); 325 326 // Go through the IO thread to grab a WeakPtr to |this|. This is safe from 327 // here, since this task will always execute before a potential deletion of 328 // ProfileIOData on IO. 329 io_task_runner_->PostTask(FROM_HERE, 330 base::Bind(&URLBlacklistManager::UpdateOnIO, 331 base::Unretained(this), 332 base::Passed(&block), 333 base::Passed(&allow))); 334} 335 336void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block, 337 scoped_ptr<base::ListValue> allow) { 338 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 339 // The URLBlacklist is built on a worker thread. Once it's ready, it is passed 340 // to the URLBlacklistManager on IO. 341 base::PostTaskAndReplyWithResult( 342 background_task_runner_, 343 FROM_HERE, 344 base::Bind(&BuildBlacklist, 345 base::Passed(&block), 346 base::Passed(&allow), 347 segment_url_), 348 base::Bind(&URLBlacklistManager::SetBlacklist, 349 io_weak_ptr_factory_.GetWeakPtr())); 350} 351 352void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) { 353 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 354 blacklist_ = blacklist.Pass(); 355} 356 357bool URLBlacklistManager::IsURLBlocked(const GURL& url) const { 358 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 359 return blacklist_->IsURLBlocked(url); 360} 361 362bool URLBlacklistManager::IsRequestBlocked( 363 const net::URLRequest& request, int* reason) const { 364 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 365#if !defined(OS_IOS) 366 // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283 367 int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME; 368 if ((request.load_flags() & filter_flags) == 0) 369 return false; 370#endif 371 372 bool block = false; 373 if (override_blacklist_.Run(request.url(), &block, reason)) 374 return block; 375 376 *reason = net::ERR_BLOCKED_BY_ADMINISTRATOR; 377 return IsURLBlocked(request.url()); 378} 379 380// static 381void URLBlacklistManager::RegisterProfilePrefs( 382 user_prefs::PrefRegistrySyncable* registry) { 383 registry->RegisterListPref(policy_prefs::kUrlBlacklist, 384 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 385 registry->RegisterListPref(policy_prefs::kUrlWhitelist, 386 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 387} 388 389} // namespace policy 390