url_blacklist_manager.cc revision c5cede9ae108bb15f6b7a8aea21c7e1fefa2834c
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/policy/core/browser/url_blacklist_manager.h"
6
7#include "base/bind.h"
8#include "base/files/file_path.h"
9#include "base/location.h"
10#include "base/message_loop/message_loop_proxy.h"
11#include "base/prefs/pref_service.h"
12#include "base/sequenced_task_runner.h"
13#include "base/stl_util.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/task_runner_util.h"
16#include "base/values.h"
17#include "components/policy/core/common/policy_pref_names.h"
18#include "components/user_prefs/pref_registry_syncable.h"
19#include "net/base/filename_util.h"
20#include "net/base/load_flags.h"
21#include "net/base/net_errors.h"
22#include "net/url_request/url_request.h"
23
24using url_matcher::URLMatcher;
25using url_matcher::URLMatcherCondition;
26using url_matcher::URLMatcherConditionFactory;
27using url_matcher::URLMatcherConditionSet;
28using url_matcher::URLMatcherPortFilter;
29using url_matcher::URLMatcherSchemeFilter;
30
31namespace policy {
32
33namespace {
34
35const char kFileScheme[] = "file";
36
37// Maximum filters per policy. Filters over this index are ignored.
38const size_t kMaxFiltersPerPolicy = 1000;
39
40// A task that builds the blacklist on a background thread.
41scoped_ptr<URLBlacklist> BuildBlacklist(
42    scoped_ptr<base::ListValue> block,
43    scoped_ptr<base::ListValue> allow,
44    URLBlacklist::SegmentURLCallback segment_url) {
45  scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url));
46  blacklist->Block(block.get());
47  blacklist->Allow(allow.get());
48  return blacklist.Pass();
49}
50
51}  // namespace
52
53struct URLBlacklist::FilterComponents {
54  FilterComponents() : port(0), match_subdomains(true), allow(true) {}
55  ~FilterComponents() {}
56
57  std::string scheme;
58  std::string host;
59  uint16 port;
60  std::string path;
61  bool match_subdomains;
62  bool allow;
63};
64
65URLBlacklist::URLBlacklist(SegmentURLCallback segment_url)
66    : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {}
67
68URLBlacklist::~URLBlacklist() {}
69
70void URLBlacklist::AddFilters(bool allow,
71                              const base::ListValue* list) {
72  URLMatcherConditionSet::Vector all_conditions;
73  size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize());
74  for (size_t i = 0; i < size; ++i) {
75    std::string pattern;
76    bool success = list->GetString(i, &pattern);
77    DCHECK(success);
78    FilterComponents components;
79    components.allow = allow;
80    if (!FilterToComponents(segment_url_, pattern, &components.scheme,
81                            &components.host, &components.match_subdomains,
82                            &components.port, &components.path)) {
83      LOG(ERROR) << "Invalid pattern " << pattern;
84      continue;
85    }
86
87    all_conditions.push_back(
88        CreateConditionSet(url_matcher_.get(), ++id_, components.scheme,
89                           components.host, components.match_subdomains,
90                           components.port, components.path));
91    filters_[id_] = components;
92  }
93  url_matcher_->AddConditionSets(all_conditions);
94}
95
96void URLBlacklist::Block(const base::ListValue* filters) {
97  AddFilters(false, filters);
98}
99
100void URLBlacklist::Allow(const base::ListValue* filters) {
101  AddFilters(true, filters);
102}
103
104bool URLBlacklist::IsURLBlocked(const GURL& url) const {
105  std::set<URLMatcherConditionSet::ID> matching_ids =
106      url_matcher_->MatchURL(url);
107
108  const FilterComponents* max = NULL;
109  for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin();
110       id != matching_ids.end(); ++id) {
111    std::map<int, FilterComponents>::const_iterator it = filters_.find(*id);
112    DCHECK(it != filters_.end());
113    const FilterComponents& filter = it->second;
114    if (!max || FilterTakesPrecedence(filter, *max))
115      max = &filter;
116  }
117
118  // Default to allow.
119  if (!max)
120    return false;
121
122  return !max->allow;
123}
124
125size_t URLBlacklist::Size() const {
126  return filters_.size();
127}
128
129// static
130bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url,
131                                      const std::string& filter,
132                                      std::string* scheme,
133                                      std::string* host,
134                                      bool* match_subdomains,
135                                      uint16* port,
136                                      std::string* path) {
137  url_parse::Parsed parsed;
138
139  if (segment_url(filter, &parsed) == kFileScheme) {
140    base::FilePath file_path;
141    if (!net::FileURLToFilePath(GURL(filter), &file_path))
142      return false;
143
144    *scheme = kFileScheme;
145    host->clear();
146    *match_subdomains = true;
147    *port = 0;
148    // Special path when the |filter| is 'file://*'.
149    *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe();
150#if defined(FILE_PATH_USES_WIN_SEPARATORS)
151    // Separators have to be canonicalized on Windows.
152    std::replace(path->begin(), path->end(), '\\', '/');
153    *path = "/" + *path;
154#endif
155    return true;
156  }
157
158  if (!parsed.host.is_nonempty())
159    return false;
160
161  if (parsed.scheme.is_nonempty())
162    scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len);
163  else
164    scheme->clear();
165
166  host->assign(filter, parsed.host.begin, parsed.host.len);
167  // Special '*' host, matches all hosts.
168  if (*host == "*") {
169    host->clear();
170    *match_subdomains = true;
171  } else if ((*host)[0] == '.') {
172    // A leading dot in the pattern syntax means that we don't want to match
173    // subdomains.
174    host->erase(0, 1);
175    *match_subdomains = false;
176  } else {
177    url_canon::RawCanonOutputT<char> output;
178    url_canon::CanonHostInfo host_info;
179    url_canon::CanonicalizeHostVerbose(filter.c_str(), parsed.host,
180                                       &output, &host_info);
181    if (host_info.family == url_canon::CanonHostInfo::NEUTRAL) {
182      // We want to match subdomains. Add a dot in front to make sure we only
183      // match at domain component boundaries.
184      *host = "." + *host;
185      *match_subdomains = true;
186    } else {
187      *match_subdomains = false;
188    }
189  }
190
191  if (parsed.port.is_nonempty()) {
192    int int_port;
193    if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len),
194                           &int_port)) {
195      return false;
196    }
197    if (int_port <= 0 || int_port > kuint16max)
198      return false;
199    *port = int_port;
200  } else {
201    // Match any port.
202    *port = 0;
203  }
204
205  if (parsed.path.is_nonempty())
206    path->assign(filter, parsed.path.begin, parsed.path.len);
207  else
208    path->clear();
209
210  return true;
211}
212
213// static
214scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet(
215    URLMatcher* url_matcher,
216    int id,
217    const std::string& scheme,
218    const std::string& host,
219    bool match_subdomains,
220    uint16 port,
221    const std::string& path) {
222  URLMatcherConditionFactory* condition_factory =
223      url_matcher->condition_factory();
224  std::set<URLMatcherCondition> conditions;
225  conditions.insert(match_subdomains ?
226      condition_factory->CreateHostSuffixPathPrefixCondition(host, path) :
227      condition_factory->CreateHostEqualsPathPrefixCondition(host, path));
228
229  scoped_ptr<URLMatcherSchemeFilter> scheme_filter;
230  if (!scheme.empty())
231    scheme_filter.reset(new URLMatcherSchemeFilter(scheme));
232
233  scoped_ptr<URLMatcherPortFilter> port_filter;
234  if (port != 0) {
235    std::vector<URLMatcherPortFilter::Range> ranges;
236    ranges.push_back(URLMatcherPortFilter::CreateRange(port));
237    port_filter.reset(new URLMatcherPortFilter(ranges));
238  }
239
240  return new URLMatcherConditionSet(id, conditions,
241                                    scheme_filter.Pass(), port_filter.Pass());
242}
243
244// static
245bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs,
246                                         const FilterComponents& rhs) {
247  if (lhs.match_subdomains && !rhs.match_subdomains)
248    return false;
249  if (!lhs.match_subdomains && rhs.match_subdomains)
250    return true;
251
252  size_t host_length = lhs.host.length();
253  size_t other_host_length = rhs.host.length();
254  if (host_length != other_host_length)
255    return host_length > other_host_length;
256
257  size_t path_length = lhs.path.length();
258  size_t other_path_length = rhs.path.length();
259  if (path_length != other_path_length)
260    return path_length > other_path_length;
261
262  if (lhs.allow && !rhs.allow)
263    return true;
264
265  return false;
266}
267
268URLBlacklistManager::URLBlacklistManager(
269    PrefService* pref_service,
270    const scoped_refptr<base::SequencedTaskRunner>& background_task_runner,
271    const scoped_refptr<base::SequencedTaskRunner>& io_task_runner,
272    URLBlacklist::SegmentURLCallback segment_url,
273    OverrideBlacklistCallback override_blacklist)
274    : ui_weak_ptr_factory_(this),
275      pref_service_(pref_service),
276      background_task_runner_(background_task_runner),
277      io_task_runner_(io_task_runner),
278      segment_url_(segment_url),
279      override_blacklist_(override_blacklist),
280      io_weak_ptr_factory_(this),
281      ui_task_runner_(base::MessageLoopProxy::current()),
282      blacklist_(new URLBlacklist(segment_url)) {
283  pref_change_registrar_.Init(pref_service_);
284  base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate,
285                                      base::Unretained(this));
286  pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback);
287  pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback);
288
289  // Start enforcing the policies without a delay when they are present at
290  // startup.
291  if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist))
292    Update();
293}
294
295void URLBlacklistManager::ShutdownOnUIThread() {
296  DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
297  // Cancel any pending updates, and stop listening for pref change updates.
298  ui_weak_ptr_factory_.InvalidateWeakPtrs();
299  pref_change_registrar_.RemoveAll();
300}
301
302URLBlacklistManager::~URLBlacklistManager() {
303}
304
305void URLBlacklistManager::ScheduleUpdate() {
306  DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
307  // Cancel pending updates, if any. This can happen if two preferences that
308  // change the blacklist are updated in one message loop cycle. In those cases,
309  // only rebuild the blacklist after all the preference updates are processed.
310  ui_weak_ptr_factory_.InvalidateWeakPtrs();
311  ui_task_runner_->PostTask(
312      FROM_HERE,
313      base::Bind(&URLBlacklistManager::Update,
314                 ui_weak_ptr_factory_.GetWeakPtr()));
315}
316
317void URLBlacklistManager::Update() {
318  DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
319
320  // The preferences can only be read on the UI thread.
321  scoped_ptr<base::ListValue> block(
322      pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy());
323  scoped_ptr<base::ListValue> allow(
324      pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy());
325
326  // Go through the IO thread to grab a WeakPtr to |this|. This is safe from
327  // here, since this task will always execute before a potential deletion of
328  // ProfileIOData on IO.
329  io_task_runner_->PostTask(FROM_HERE,
330                            base::Bind(&URLBlacklistManager::UpdateOnIO,
331                                       base::Unretained(this),
332                                       base::Passed(&block),
333                                       base::Passed(&allow)));
334}
335
336void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block,
337                                     scoped_ptr<base::ListValue> allow) {
338  DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
339  // The URLBlacklist is built on a worker thread. Once it's ready, it is passed
340  // to the URLBlacklistManager on IO.
341  base::PostTaskAndReplyWithResult(
342      background_task_runner_,
343      FROM_HERE,
344      base::Bind(&BuildBlacklist,
345                 base::Passed(&block),
346                 base::Passed(&allow),
347                 segment_url_),
348      base::Bind(&URLBlacklistManager::SetBlacklist,
349                 io_weak_ptr_factory_.GetWeakPtr()));
350}
351
352void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) {
353  DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
354  blacklist_ = blacklist.Pass();
355}
356
357bool URLBlacklistManager::IsURLBlocked(const GURL& url) const {
358  DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
359  return blacklist_->IsURLBlocked(url);
360}
361
362bool URLBlacklistManager::IsRequestBlocked(
363    const net::URLRequest& request, int* reason) const {
364  DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
365#if !defined(OS_IOS)
366  // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283
367  int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME;
368  if ((request.load_flags() & filter_flags) == 0)
369    return false;
370#endif
371
372  bool block = false;
373  if (override_blacklist_.Run(request.url(), &block, reason))
374    return block;
375
376  *reason = net::ERR_BLOCKED_BY_ADMINISTRATOR;
377  return IsURLBlocked(request.url());
378}
379
380// static
381void URLBlacklistManager::RegisterProfilePrefs(
382    user_prefs::PrefRegistrySyncable* registry) {
383  registry->RegisterListPref(policy_prefs::kUrlBlacklist,
384                             user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
385  registry->RegisterListPref(policy_prefs::kUrlWhitelist,
386                             user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
387}
388
389}  // namespace policy
390