url_blacklist_manager.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/policy/core/browser/url_blacklist_manager.h" 6 7#include "base/bind.h" 8#include "base/files/file_path.h" 9#include "base/location.h" 10#include "base/message_loop/message_loop_proxy.h" 11#include "base/prefs/pref_service.h" 12#include "base/sequenced_task_runner.h" 13#include "base/stl_util.h" 14#include "base/strings/string_number_conversions.h" 15#include "base/task_runner_util.h" 16#include "base/values.h" 17#include "components/policy/core/common/policy_pref_names.h" 18#include "components/user_prefs/pref_registry_syncable.h" 19#include "net/base/load_flags.h" 20#include "net/base/net_util.h" 21#include "net/url_request/url_request.h" 22 23using url_matcher::URLMatcher; 24using url_matcher::URLMatcherCondition; 25using url_matcher::URLMatcherConditionFactory; 26using url_matcher::URLMatcherConditionSet; 27using url_matcher::URLMatcherPortFilter; 28using url_matcher::URLMatcherSchemeFilter; 29 30namespace policy { 31 32namespace { 33 34const char kFileScheme[] = "file"; 35 36// Maximum filters per policy. Filters over this index are ignored. 37const size_t kMaxFiltersPerPolicy = 1000; 38 39// A task that builds the blacklist on a background thread. 40scoped_ptr<URLBlacklist> BuildBlacklist( 41 scoped_ptr<base::ListValue> block, 42 scoped_ptr<base::ListValue> allow, 43 URLBlacklist::SegmentURLCallback segment_url) { 44 scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url)); 45 blacklist->Block(block.get()); 46 blacklist->Allow(allow.get()); 47 return blacklist.Pass(); 48} 49 50} // namespace 51 52struct URLBlacklist::FilterComponents { 53 FilterComponents() : port(0), match_subdomains(true), allow(true) {} 54 ~FilterComponents() {} 55 56 std::string scheme; 57 std::string host; 58 uint16 port; 59 std::string path; 60 bool match_subdomains; 61 bool allow; 62}; 63 64URLBlacklist::URLBlacklist(SegmentURLCallback segment_url) 65 : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {} 66 67URLBlacklist::~URLBlacklist() {} 68 69void URLBlacklist::AddFilters(bool allow, 70 const base::ListValue* list) { 71 URLMatcherConditionSet::Vector all_conditions; 72 size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize()); 73 for (size_t i = 0; i < size; ++i) { 74 std::string pattern; 75 bool success = list->GetString(i, &pattern); 76 DCHECK(success); 77 FilterComponents components; 78 components.allow = allow; 79 if (!FilterToComponents(segment_url_, pattern, &components.scheme, 80 &components.host, &components.match_subdomains, 81 &components.port, &components.path)) { 82 LOG(ERROR) << "Invalid pattern " << pattern; 83 continue; 84 } 85 86 all_conditions.push_back( 87 CreateConditionSet(url_matcher_.get(), ++id_, components.scheme, 88 components.host, components.match_subdomains, 89 components.port, components.path)); 90 filters_[id_] = components; 91 } 92 url_matcher_->AddConditionSets(all_conditions); 93} 94 95void URLBlacklist::Block(const base::ListValue* filters) { 96 AddFilters(false, filters); 97} 98 99void URLBlacklist::Allow(const base::ListValue* filters) { 100 AddFilters(true, filters); 101} 102 103bool URLBlacklist::IsURLBlocked(const GURL& url) const { 104 std::set<URLMatcherConditionSet::ID> matching_ids = 105 url_matcher_->MatchURL(url); 106 107 const FilterComponents* max = NULL; 108 for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin(); 109 id != matching_ids.end(); ++id) { 110 std::map<int, FilterComponents>::const_iterator it = filters_.find(*id); 111 DCHECK(it != filters_.end()); 112 const FilterComponents& filter = it->second; 113 if (!max || FilterTakesPrecedence(filter, *max)) 114 max = &filter; 115 } 116 117 // Default to allow. 118 if (!max) 119 return false; 120 121 return !max->allow; 122} 123 124size_t URLBlacklist::Size() const { 125 return filters_.size(); 126} 127 128// static 129bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url, 130 const std::string& filter, 131 std::string* scheme, 132 std::string* host, 133 bool* match_subdomains, 134 uint16* port, 135 std::string* path) { 136 url_parse::Parsed parsed; 137 138 if (segment_url(filter, &parsed) == kFileScheme) { 139 base::FilePath file_path; 140 if (!net::FileURLToFilePath(GURL(filter), &file_path)) 141 return false; 142 143 *scheme = kFileScheme; 144 host->clear(); 145 *match_subdomains = true; 146 *port = 0; 147 // Special path when the |filter| is 'file://*'. 148 *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe(); 149#if defined(FILE_PATH_USES_WIN_SEPARATORS) 150 // Separators have to be canonicalized on Windows. 151 std::replace(path->begin(), path->end(), '\\', '/'); 152 *path = "/" + *path; 153#endif 154 return true; 155 } 156 157 if (!parsed.host.is_nonempty()) 158 return false; 159 160 if (parsed.scheme.is_nonempty()) 161 scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len); 162 else 163 scheme->clear(); 164 165 host->assign(filter, parsed.host.begin, parsed.host.len); 166 // Special '*' host, matches all hosts. 167 if (*host == "*") { 168 host->clear(); 169 *match_subdomains = true; 170 } else if ((*host)[0] == '.') { 171 // A leading dot in the pattern syntax means that we don't want to match 172 // subdomains. 173 host->erase(0, 1); 174 *match_subdomains = false; 175 } else { 176 url_canon::RawCanonOutputT<char> output; 177 url_canon::CanonHostInfo host_info; 178 url_canon::CanonicalizeHostVerbose(filter.c_str(), parsed.host, 179 &output, &host_info); 180 if (host_info.family == url_canon::CanonHostInfo::NEUTRAL) { 181 // We want to match subdomains. Add a dot in front to make sure we only 182 // match at domain component boundaries. 183 *host = "." + *host; 184 *match_subdomains = true; 185 } else { 186 *match_subdomains = false; 187 } 188 } 189 190 if (parsed.port.is_nonempty()) { 191 int int_port; 192 if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len), 193 &int_port)) { 194 return false; 195 } 196 if (int_port <= 0 || int_port > kuint16max) 197 return false; 198 *port = int_port; 199 } else { 200 // Match any port. 201 *port = 0; 202 } 203 204 if (parsed.path.is_nonempty()) 205 path->assign(filter, parsed.path.begin, parsed.path.len); 206 else 207 path->clear(); 208 209 return true; 210} 211 212// static 213scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet( 214 URLMatcher* url_matcher, 215 int id, 216 const std::string& scheme, 217 const std::string& host, 218 bool match_subdomains, 219 uint16 port, 220 const std::string& path) { 221 URLMatcherConditionFactory* condition_factory = 222 url_matcher->condition_factory(); 223 std::set<URLMatcherCondition> conditions; 224 conditions.insert(match_subdomains ? 225 condition_factory->CreateHostSuffixPathPrefixCondition(host, path) : 226 condition_factory->CreateHostEqualsPathPrefixCondition(host, path)); 227 228 scoped_ptr<URLMatcherSchemeFilter> scheme_filter; 229 if (!scheme.empty()) 230 scheme_filter.reset(new URLMatcherSchemeFilter(scheme)); 231 232 scoped_ptr<URLMatcherPortFilter> port_filter; 233 if (port != 0) { 234 std::vector<URLMatcherPortFilter::Range> ranges; 235 ranges.push_back(URLMatcherPortFilter::CreateRange(port)); 236 port_filter.reset(new URLMatcherPortFilter(ranges)); 237 } 238 239 return new URLMatcherConditionSet(id, conditions, 240 scheme_filter.Pass(), port_filter.Pass()); 241} 242 243// static 244bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs, 245 const FilterComponents& rhs) { 246 if (lhs.match_subdomains && !rhs.match_subdomains) 247 return false; 248 if (!lhs.match_subdomains && rhs.match_subdomains) 249 return true; 250 251 size_t host_length = lhs.host.length(); 252 size_t other_host_length = rhs.host.length(); 253 if (host_length != other_host_length) 254 return host_length > other_host_length; 255 256 size_t path_length = lhs.path.length(); 257 size_t other_path_length = rhs.path.length(); 258 if (path_length != other_path_length) 259 return path_length > other_path_length; 260 261 if (lhs.allow && !rhs.allow) 262 return true; 263 264 return false; 265} 266 267URLBlacklistManager::URLBlacklistManager( 268 PrefService* pref_service, 269 const scoped_refptr<base::SequencedTaskRunner>& background_task_runner, 270 const scoped_refptr<base::SequencedTaskRunner>& io_task_runner, 271 URLBlacklist::SegmentURLCallback segment_url, 272 SkipBlacklistCallback skip_blacklist) 273 : ui_weak_ptr_factory_(this), 274 pref_service_(pref_service), 275 background_task_runner_(background_task_runner), 276 io_task_runner_(io_task_runner), 277 segment_url_(segment_url), 278 skip_blacklist_(skip_blacklist), 279 io_weak_ptr_factory_(this), 280 ui_task_runner_(base::MessageLoopProxy::current()), 281 blacklist_(new URLBlacklist(segment_url)) { 282 pref_change_registrar_.Init(pref_service_); 283 base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate, 284 base::Unretained(this)); 285 pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback); 286 pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback); 287 288 // Start enforcing the policies without a delay when they are present at 289 // startup. 290 if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist)) 291 Update(); 292} 293 294void URLBlacklistManager::ShutdownOnUIThread() { 295 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 296 // Cancel any pending updates, and stop listening for pref change updates. 297 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 298 pref_change_registrar_.RemoveAll(); 299} 300 301URLBlacklistManager::~URLBlacklistManager() { 302} 303 304void URLBlacklistManager::ScheduleUpdate() { 305 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 306 // Cancel pending updates, if any. This can happen if two preferences that 307 // change the blacklist are updated in one message loop cycle. In those cases, 308 // only rebuild the blacklist after all the preference updates are processed. 309 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 310 ui_task_runner_->PostTask( 311 FROM_HERE, 312 base::Bind(&URLBlacklistManager::Update, 313 ui_weak_ptr_factory_.GetWeakPtr())); 314} 315 316void URLBlacklistManager::Update() { 317 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 318 319 // The preferences can only be read on the UI thread. 320 scoped_ptr<base::ListValue> block( 321 pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy()); 322 scoped_ptr<base::ListValue> allow( 323 pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy()); 324 325 // Go through the IO thread to grab a WeakPtr to |this|. This is safe from 326 // here, since this task will always execute before a potential deletion of 327 // ProfileIOData on IO. 328 io_task_runner_->PostTask(FROM_HERE, 329 base::Bind(&URLBlacklistManager::UpdateOnIO, 330 base::Unretained(this), 331 base::Passed(&block), 332 base::Passed(&allow))); 333} 334 335void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block, 336 scoped_ptr<base::ListValue> allow) { 337 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 338 // The URLBlacklist is built on a worker thread. Once it's ready, it is passed 339 // to the URLBlacklistManager on IO. 340 base::PostTaskAndReplyWithResult( 341 background_task_runner_, 342 FROM_HERE, 343 base::Bind(&BuildBlacklist, 344 base::Passed(&block), 345 base::Passed(&allow), 346 segment_url_), 347 base::Bind(&URLBlacklistManager::SetBlacklist, 348 io_weak_ptr_factory_.GetWeakPtr())); 349} 350 351void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) { 352 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 353 blacklist_ = blacklist.Pass(); 354} 355 356bool URLBlacklistManager::IsURLBlocked(const GURL& url) const { 357 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 358 return blacklist_->IsURLBlocked(url); 359} 360 361bool URLBlacklistManager::IsRequestBlocked( 362 const net::URLRequest& request) const { 363 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 364#if !defined(OS_IOS) 365 // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283 366 int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME; 367 if ((request.load_flags() & filter_flags) == 0) 368 return false; 369#endif 370 371 if (skip_blacklist_(request.url())) 372 return false; 373 374 return IsURLBlocked(request.url()); 375} 376 377// static 378void URLBlacklistManager::RegisterProfilePrefs( 379 user_prefs::PrefRegistrySyncable* registry) { 380 registry->RegisterListPref(policy_prefs::kUrlBlacklist, 381 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 382 registry->RegisterListPref(policy_prefs::kUrlWhitelist, 383 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 384} 385 386} // namespace policy 387