url_blacklist_manager.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/policy/core/browser/url_blacklist_manager.h"
6
7#include "base/bind.h"
8#include "base/files/file_path.h"
9#include "base/location.h"
10#include "base/message_loop/message_loop_proxy.h"
11#include "base/prefs/pref_service.h"
12#include "base/sequenced_task_runner.h"
13#include "base/stl_util.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/task_runner_util.h"
16#include "base/values.h"
17#include "components/policy/core/common/policy_pref_names.h"
18#include "components/user_prefs/pref_registry_syncable.h"
19#include "net/base/load_flags.h"
20#include "net/base/net_util.h"
21#include "net/url_request/url_request.h"
22
23using url_matcher::URLMatcher;
24using url_matcher::URLMatcherCondition;
25using url_matcher::URLMatcherConditionFactory;
26using url_matcher::URLMatcherConditionSet;
27using url_matcher::URLMatcherPortFilter;
28using url_matcher::URLMatcherSchemeFilter;
29
30namespace policy {
31
32namespace {
33
34const char kFileScheme[] = "file";
35
36// Maximum filters per policy. Filters over this index are ignored.
37const size_t kMaxFiltersPerPolicy = 1000;
38
39// A task that builds the blacklist on a background thread.
40scoped_ptr<URLBlacklist> BuildBlacklist(
41    scoped_ptr<base::ListValue> block,
42    scoped_ptr<base::ListValue> allow,
43    URLBlacklist::SegmentURLCallback segment_url) {
44  scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url));
45  blacklist->Block(block.get());
46  blacklist->Allow(allow.get());
47  return blacklist.Pass();
48}
49
50}  // namespace
51
52struct URLBlacklist::FilterComponents {
53  FilterComponents() : port(0), match_subdomains(true), allow(true) {}
54  ~FilterComponents() {}
55
56  std::string scheme;
57  std::string host;
58  uint16 port;
59  std::string path;
60  bool match_subdomains;
61  bool allow;
62};
63
64URLBlacklist::URLBlacklist(SegmentURLCallback segment_url)
65    : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {}
66
67URLBlacklist::~URLBlacklist() {}
68
69void URLBlacklist::AddFilters(bool allow,
70                              const base::ListValue* list) {
71  URLMatcherConditionSet::Vector all_conditions;
72  size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize());
73  for (size_t i = 0; i < size; ++i) {
74    std::string pattern;
75    bool success = list->GetString(i, &pattern);
76    DCHECK(success);
77    FilterComponents components;
78    components.allow = allow;
79    if (!FilterToComponents(segment_url_, pattern, &components.scheme,
80                            &components.host, &components.match_subdomains,
81                            &components.port, &components.path)) {
82      LOG(ERROR) << "Invalid pattern " << pattern;
83      continue;
84    }
85
86    all_conditions.push_back(
87        CreateConditionSet(url_matcher_.get(), ++id_, components.scheme,
88                           components.host, components.match_subdomains,
89                           components.port, components.path));
90    filters_[id_] = components;
91  }
92  url_matcher_->AddConditionSets(all_conditions);
93}
94
95void URLBlacklist::Block(const base::ListValue* filters) {
96  AddFilters(false, filters);
97}
98
99void URLBlacklist::Allow(const base::ListValue* filters) {
100  AddFilters(true, filters);
101}
102
103bool URLBlacklist::IsURLBlocked(const GURL& url) const {
104  std::set<URLMatcherConditionSet::ID> matching_ids =
105      url_matcher_->MatchURL(url);
106
107  const FilterComponents* max = NULL;
108  for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin();
109       id != matching_ids.end(); ++id) {
110    std::map<int, FilterComponents>::const_iterator it = filters_.find(*id);
111    DCHECK(it != filters_.end());
112    const FilterComponents& filter = it->second;
113    if (!max || FilterTakesPrecedence(filter, *max))
114      max = &filter;
115  }
116
117  // Default to allow.
118  if (!max)
119    return false;
120
121  return !max->allow;
122}
123
124size_t URLBlacklist::Size() const {
125  return filters_.size();
126}
127
128// static
129bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url,
130                                      const std::string& filter,
131                                      std::string* scheme,
132                                      std::string* host,
133                                      bool* match_subdomains,
134                                      uint16* port,
135                                      std::string* path) {
136  url_parse::Parsed parsed;
137
138  if (segment_url(filter, &parsed) == kFileScheme) {
139    base::FilePath file_path;
140    if (!net::FileURLToFilePath(GURL(filter), &file_path))
141      return false;
142
143    *scheme = kFileScheme;
144    host->clear();
145    *match_subdomains = true;
146    *port = 0;
147    // Special path when the |filter| is 'file://*'.
148    *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe();
149#if defined(FILE_PATH_USES_WIN_SEPARATORS)
150    // Separators have to be canonicalized on Windows.
151    std::replace(path->begin(), path->end(), '\\', '/');
152    *path = "/" + *path;
153#endif
154    return true;
155  }
156
157  if (!parsed.host.is_nonempty())
158    return false;
159
160  if (parsed.scheme.is_nonempty())
161    scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len);
162  else
163    scheme->clear();
164
165  host->assign(filter, parsed.host.begin, parsed.host.len);
166  // Special '*' host, matches all hosts.
167  if (*host == "*") {
168    host->clear();
169    *match_subdomains = true;
170  } else if ((*host)[0] == '.') {
171    // A leading dot in the pattern syntax means that we don't want to match
172    // subdomains.
173    host->erase(0, 1);
174    *match_subdomains = false;
175  } else {
176    url_canon::RawCanonOutputT<char> output;
177    url_canon::CanonHostInfo host_info;
178    url_canon::CanonicalizeHostVerbose(filter.c_str(), parsed.host,
179                                       &output, &host_info);
180    if (host_info.family == url_canon::CanonHostInfo::NEUTRAL) {
181      // We want to match subdomains. Add a dot in front to make sure we only
182      // match at domain component boundaries.
183      *host = "." + *host;
184      *match_subdomains = true;
185    } else {
186      *match_subdomains = false;
187    }
188  }
189
190  if (parsed.port.is_nonempty()) {
191    int int_port;
192    if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len),
193                           &int_port)) {
194      return false;
195    }
196    if (int_port <= 0 || int_port > kuint16max)
197      return false;
198    *port = int_port;
199  } else {
200    // Match any port.
201    *port = 0;
202  }
203
204  if (parsed.path.is_nonempty())
205    path->assign(filter, parsed.path.begin, parsed.path.len);
206  else
207    path->clear();
208
209  return true;
210}
211
212// static
213scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet(
214    URLMatcher* url_matcher,
215    int id,
216    const std::string& scheme,
217    const std::string& host,
218    bool match_subdomains,
219    uint16 port,
220    const std::string& path) {
221  URLMatcherConditionFactory* condition_factory =
222      url_matcher->condition_factory();
223  std::set<URLMatcherCondition> conditions;
224  conditions.insert(match_subdomains ?
225      condition_factory->CreateHostSuffixPathPrefixCondition(host, path) :
226      condition_factory->CreateHostEqualsPathPrefixCondition(host, path));
227
228  scoped_ptr<URLMatcherSchemeFilter> scheme_filter;
229  if (!scheme.empty())
230    scheme_filter.reset(new URLMatcherSchemeFilter(scheme));
231
232  scoped_ptr<URLMatcherPortFilter> port_filter;
233  if (port != 0) {
234    std::vector<URLMatcherPortFilter::Range> ranges;
235    ranges.push_back(URLMatcherPortFilter::CreateRange(port));
236    port_filter.reset(new URLMatcherPortFilter(ranges));
237  }
238
239  return new URLMatcherConditionSet(id, conditions,
240                                    scheme_filter.Pass(), port_filter.Pass());
241}
242
243// static
244bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs,
245                                         const FilterComponents& rhs) {
246  if (lhs.match_subdomains && !rhs.match_subdomains)
247    return false;
248  if (!lhs.match_subdomains && rhs.match_subdomains)
249    return true;
250
251  size_t host_length = lhs.host.length();
252  size_t other_host_length = rhs.host.length();
253  if (host_length != other_host_length)
254    return host_length > other_host_length;
255
256  size_t path_length = lhs.path.length();
257  size_t other_path_length = rhs.path.length();
258  if (path_length != other_path_length)
259    return path_length > other_path_length;
260
261  if (lhs.allow && !rhs.allow)
262    return true;
263
264  return false;
265}
266
267URLBlacklistManager::URLBlacklistManager(
268    PrefService* pref_service,
269    const scoped_refptr<base::SequencedTaskRunner>& background_task_runner,
270    const scoped_refptr<base::SequencedTaskRunner>& io_task_runner,
271    URLBlacklist::SegmentURLCallback segment_url,
272    SkipBlacklistCallback skip_blacklist)
273    : ui_weak_ptr_factory_(this),
274      pref_service_(pref_service),
275      background_task_runner_(background_task_runner),
276      io_task_runner_(io_task_runner),
277      segment_url_(segment_url),
278      skip_blacklist_(skip_blacklist),
279      io_weak_ptr_factory_(this),
280      ui_task_runner_(base::MessageLoopProxy::current()),
281      blacklist_(new URLBlacklist(segment_url)) {
282  pref_change_registrar_.Init(pref_service_);
283  base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate,
284                                      base::Unretained(this));
285  pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback);
286  pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback);
287
288  // Start enforcing the policies without a delay when they are present at
289  // startup.
290  if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist))
291    Update();
292}
293
294void URLBlacklistManager::ShutdownOnUIThread() {
295  DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
296  // Cancel any pending updates, and stop listening for pref change updates.
297  ui_weak_ptr_factory_.InvalidateWeakPtrs();
298  pref_change_registrar_.RemoveAll();
299}
300
301URLBlacklistManager::~URLBlacklistManager() {
302}
303
304void URLBlacklistManager::ScheduleUpdate() {
305  DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
306  // Cancel pending updates, if any. This can happen if two preferences that
307  // change the blacklist are updated in one message loop cycle. In those cases,
308  // only rebuild the blacklist after all the preference updates are processed.
309  ui_weak_ptr_factory_.InvalidateWeakPtrs();
310  ui_task_runner_->PostTask(
311      FROM_HERE,
312      base::Bind(&URLBlacklistManager::Update,
313                 ui_weak_ptr_factory_.GetWeakPtr()));
314}
315
316void URLBlacklistManager::Update() {
317  DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
318
319  // The preferences can only be read on the UI thread.
320  scoped_ptr<base::ListValue> block(
321      pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy());
322  scoped_ptr<base::ListValue> allow(
323      pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy());
324
325  // Go through the IO thread to grab a WeakPtr to |this|. This is safe from
326  // here, since this task will always execute before a potential deletion of
327  // ProfileIOData on IO.
328  io_task_runner_->PostTask(FROM_HERE,
329                            base::Bind(&URLBlacklistManager::UpdateOnIO,
330                                       base::Unretained(this),
331                                       base::Passed(&block),
332                                       base::Passed(&allow)));
333}
334
335void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block,
336                                     scoped_ptr<base::ListValue> allow) {
337  DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
338  // The URLBlacklist is built on a worker thread. Once it's ready, it is passed
339  // to the URLBlacklistManager on IO.
340  base::PostTaskAndReplyWithResult(
341      background_task_runner_,
342      FROM_HERE,
343      base::Bind(&BuildBlacklist,
344                 base::Passed(&block),
345                 base::Passed(&allow),
346                 segment_url_),
347      base::Bind(&URLBlacklistManager::SetBlacklist,
348                 io_weak_ptr_factory_.GetWeakPtr()));
349}
350
351void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) {
352  DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
353  blacklist_ = blacklist.Pass();
354}
355
356bool URLBlacklistManager::IsURLBlocked(const GURL& url) const {
357  DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
358  return blacklist_->IsURLBlocked(url);
359}
360
361bool URLBlacklistManager::IsRequestBlocked(
362    const net::URLRequest& request) const {
363  DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
364#if !defined(OS_IOS)
365  // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283
366  int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME;
367  if ((request.load_flags() & filter_flags) == 0)
368    return false;
369#endif
370
371  if (skip_blacklist_(request.url()))
372    return false;
373
374  return IsURLBlocked(request.url());
375}
376
377// static
378void URLBlacklistManager::RegisterProfilePrefs(
379    user_prefs::PrefRegistrySyncable* registry) {
380  registry->RegisterListPref(policy_prefs::kUrlBlacklist,
381                             user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
382  registry->RegisterListPref(policy_prefs::kUrlWhitelist,
383                             user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
384}
385
386}  // namespace policy
387